## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following object is masked from 'package:kableExtra':
## 
##     group_rows
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## Loading required package: grid
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## Loading required package: survival
## 
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
## 
##     dotchart
## Loading required package: viridisLite
## 
## Attaching package: 'scales'
## The following object is masked from 'package:viridis':
## 
##     viridis_pal
## 
## Attaching package: 'readr'
## The following object is masked from 'package:scales':
## 
##     col_factor

Problem 1: Measuring Migration with the American Community Survey - 40 points

Using the tidycensus package, extract migration information about King County, Washington, and present your results in figures and maps.

To look at county-wide estimates we will make use of the ACS 1-year data accessed in various tidycensus functions by specifying dataset = “acs1” or survey = “acs1”.

To get Census tract-level estimates within King County we will make use of the ACS 5-year data accessed in various tidycensus functions by specifying dataset = “acs5” or survey = “acs5”.

Note: when you are using ACS 5-year data, the year argument in tidycensus functions will take the ending year of a 5-year period. For example, year = 2023 will be referring to the 5-year period 2019-2023.

# check if this works
source("CensusAPIKey.R")
census_api_key(key = myKey, install = TRUE, overwrite= TRUE)
## Your original .Renviron will be backed up and stored in your R HOME directory if needed.
## Your API key has been stored in your .Renviron and can be accessed by Sys.getenv("CENSUS_API_KEY"). 
## To use now, restart R or run `readRenviron("~/.Renviron")`
## [1] "5ca56c6f2ea26c7315b7fd6270ebf0edbbe41af7"
## Get list of all variables in 2023
acs1_vars_2023 <- load_variables(year = 2023, dataset = "acs1")

## Find migration "concepts"
acs1_vars_2023 %>% filter(grepl("GEOGRAPHICAL MOBILITY", concept,
ignore.case = TRUE)) %>%
  pull(concept) %>%
  unique()
##  [1] "Geographical Mobility in the Past Year by Age for Current Residence in Puerto Rico"                                                                                  
##  [2] "Geographical Mobility in the Past Year by Age for Current Residence in the United States"                                                                            
##  [3] "Median Age by Geographical Mobility in the Past Year for Current Residence in Puerto Rico"                                                                           
##  [4] "Median Age by Geographical Mobility in the Past Year for Current Residence in the United States"                                                                     
##  [5] "Geographical Mobility in the Past Year by Sex for Current Residence in Puerto Rico"                                                                                  
##  [6] "Geographical Mobility in the Past Year by Sex for Current Residence in the United States"                                                                            
##  [7] "Geographical Mobility in the Past Year (White Alone) for Current Residence in Puerto Rico"                                                                           
##  [8] "Geographical Mobility in the Past Year (White Alone) for Current Residence in the United States"                                                                     
##  [9] "Geographical Mobility in the Past Year (Black or African American Alone) for Current Residence in Puerto Rico"                                                       
## [10] "Geographical Mobility in the Past Year (Black or African American Alone) for Current Residence in the United States"                                                 
## [11] "Geographical Mobility in the Past Year (American Indian and Alaska Native Alone) for Current Residence in Puerto Rico"                                               
## [12] "Geographical Mobility in the Past Year (American Indian and Alaska Native Alone) for Current Residence in the United States"                                         
## [13] "Geographical Mobility in the Past Year (Asian Alone) for Current Residence in Puerto Rico"                                                                           
## [14] "Geographical Mobility in the Past Year (Asian Alone) for Current Residence in the United States"                                                                     
## [15] "Geographical Mobility in the Past Year (Native Hawaiian and Other Pacific Islander Alone) for Current Residence in Puerto Rico"                                      
## [16] "Geographical Mobility in the Past Year (Native Hawaiian and Other Pacific Islander Alone) for Current Residence in the United States"                                
## [17] "Geographical Mobility in the Past Year (Some Other Race Alone) for Current Residence in Puerto Rico"                                                                 
## [18] "Geographical Mobility in the Past Year (Some Other Race Alone) for Current Residence in the United States"                                                           
## [19] "Geographical Mobility in the Past Year (Two or More Races) for Current Residence in Puerto Rico"                                                                     
## [20] "Geographical Mobility in the Past Year (Two or More Races) for Current Residence in the United States"                                                               
## [21] "Geographical Mobility in the Past Year (White Alone, Not Hispanic or Latino) for Current Residence in Puerto Rico"                                                   
## [22] "Geographical Mobility in the Past Year (White Alone, Not Hispanic or Latino) for Current Residence in the United States"                                             
## [23] "Geographical Mobility in the Past Year (Hispanic or Latino) for Current Residence in Puerto Rico"                                                                    
## [24] "Geographical Mobility in the Past Year (Hispanic or Latino) for Current Residence in the United States"                                                              
## [25] "Geographical Mobility in the Past Year by Citizenship Status for Current Residence in Puerto Rico"                                                                   
## [26] "Geographical Mobility in the Past Year by Citizenship Status for Current Residence in the United States"                                                             
## [27] "Geographical Mobility in the Past Year by Marital Status for Current Residence in Puerto Rico"                                                                       
## [28] "Geographical Mobility in the Past Year by Marital Status for Current Residence in the United States"                                                                 
## [29] "Geographical Mobility in the Past Year by Educational Attainment for Current Residence in Puerto Rico"                                                               
## [30] "Geographical Mobility in the Past Year by Educational Attainment for Current Residence in the United States"                                                         
## [31] "Geographical Mobility in the Past Year by Individual Income in the Past 12 Months (in 2023 Inflation-Adjusted Dollars) for Current Residence in Puerto Rico"         
## [32] "Geographical Mobility in the Past Year by Individual Income in the Past 12 Months (in 2023 Inflation-Adjusted Dollars) for Current Residence in the United States"   
## [33] "Median Income in the Past 12 Months (in 2023 Inflation-Adjusted Dollars) by Geographical Mobility in the Past Year for Current Residence in Puerto Rico"             
## [34] "Median Income in the Past 12 Months (in 2023 Inflation-Adjusted Dollars) by Geographical Mobility in the Past Year for Current Residence in the United States"       
## [35] "Geographical Mobility in the Past Year by Poverty Status in the Past 12 Months for Current Residence in Puerto Rico"                                                 
## [36] "Geographical Mobility in the Past Year by Poverty Status in the Past 12 Months for Current Residence in the United States"                                           
## [37] "Geographical Mobility in the Past Year by Tenure for Current Residence in Puerto Rico"                                                                               
## [38] "Geographical Mobility in the Past Year by Tenure for Current Residence in the United States"                                                                         
## [39] "Geographical Mobility in the Past Year for Current Residence--Metropolitan Statistical Area Level in Puerto Rico"                                                    
## [40] "Geographical Mobility in the Past Year for Current Residence--Metropolitan Statistical Area Level in the United States"                                              
## [41] "Geographical Mobility in the Past Year for Current Residence--Micropolitan Statistical Area Level in Puerto Rico"                                                    
## [42] "Geographical Mobility in the Past Year for Current Residence--Micropolitan Statistical Area Level in the United States"                                              
## [43] "Geographical Mobility in the Past Year for Current Residence--Not Metropolitan or Micropolitan Statistical Area Level in the United States"                          
## [44] "Geographical Mobility in the Past Year for Current Residence--State, County and Place Level in Puerto Rico"                                                          
## [45] "Geographical Mobility in the Past Year for Current Residence--State, County and Place Level in the United States"                                                    
## [46] "Geographical Mobility in the Past Year by Age for Residence 1 Year Ago in Puerto Rico"                                                                               
## [47] "Geographical Mobility in the Past Year by Age for Residence 1 Year Ago in the United States"                                                                         
## [48] "Median Age by Geographical Mobility in the Past Year for Residence 1 Year Ago in Puerto Rico"                                                                        
## [49] "Median Age by Geographical Mobility in the Past Year for Residence 1 Year Ago in the United States"                                                                  
## [50] "Geographical Mobility in the Past Year by Sex for Residence 1 Year Ago in Puerto Rico"                                                                               
## [51] "Geographical Mobility in the Past Year by Sex for Residence 1 Year Ago in the United States"                                                                         
## [52] "Geographical Mobility in the Past Year (White Alone) for Residence 1 Year Ago in Puerto Rico"                                                                        
## [53] "Geographical Mobility in the Past Year (White Alone) for Residence 1 Year Ago in the United States"                                                                  
## [54] "Geographical Mobility in the Past Year (Black or African American Alone) for Residence 1 Year Ago in Puerto Rico"                                                    
## [55] "Geographical Mobility in the Past Year (Black or African American Alone) for Residence 1 Year Ago in the United States"                                              
## [56] "Geographical Mobility in the Past Year (American Indian and Alaska Native Alone) for Residence 1 Year Ago in Puerto Rico"                                            
## [57] "Geographical Mobility in the Past Year (American Indian and Alaska Native Alone) for Residence 1 Year Ago in the United States"                                      
## [58] "Geographical Mobility in the Past Year (Asian Alone) for Residence 1 Year Ago in Puerto Rico"                                                                        
## [59] "Geographical Mobility in the Past Year (Asian Alone) for Residence 1 Year Ago in the United States"                                                                  
## [60] "Geographical Mobility in the Past Year (Native Hawaiian and Other Pacific Islander Alone) for Residence 1 Year Ago in Puerto Rico"                                   
## [61] "Geographical Mobility in the Past Year (Native Hawaiian and Other Pacific Islander Alone) for Residence 1 Year Ago in the United States"                             
## [62] "Geographical Mobility in the Past Year (Some Other Race Alone) for Residence 1 Year Ago in Puerto Rico"                                                              
## [63] "Geographical Mobility in the Past Year (Some Other Race Alone) for Residence 1 Year Ago in the United States"                                                        
## [64] "Geographical Mobility in the Past Year (Two or More Races) for Residence 1 Year Ago in Puerto Rico"                                                                  
## [65] "Geographical Mobility in the Past Year (Two or More Races) for Residence 1 Year Ago in the United States"                                                            
## [66] "Geographical Mobility in the Past Year (White Alone, Not Hispanic or Latino) for Residence 1 Year Ago in Puerto Rico"                                                
## [67] "Geographical Mobility in the Past Year (White Alone, Not Hispanic or Latino) for Residence 1 Year Ago in the United States"                                          
## [68] "Geographical Mobility in the Past Year (Hispanic or Latino) for Residence 1 Year Ago in Puerto Rico"                                                                 
## [69] "Geographical Mobility in the Past Year (Hispanic or Latino) for Residence 1 Year Ago in the United States"                                                           
## [70] "Geographical Mobility in the Past Year by Citizenship Status for Residence 1 Year Ago in Puerto Rico"                                                                
## [71] "Geographical Mobility in the Past Year by Citizenship Status for Residence 1 Year Ago in the United States"                                                          
## [72] "Geographical Mobility in the Past Year by Marital Status for Residence 1 Year Ago in Puerto Rico"                                                                    
## [73] "Geographical Mobility in the Past Year by Marital Status for Residence 1 Year Ago in the United States"                                                              
## [74] "Geographical Mobility in the Past Year by Educational Attainment for Residence 1 Year Ago in Puerto Rico"                                                            
## [75] "Geographical Mobility in the Past Year by Educational Attainment for Residence 1 Year Ago in the United States"                                                      
## [76] "Geographical Mobility in the Past Year by Individual Income in the Past 12 Months (in 2023 Inflation-Adjusted Dollars) for Residence 1 Year Ago in Puerto Rico"      
## [77] "Geographical Mobility in the Past Year by Individual Income in the Past 12 Months (in 2023 Inflation-Adjusted Dollars) for Residence 1 Year Ago in the United States"
## [78] "Median Income in the Past 12 Months (in 2023 Inflation-Adjusted Dollars) by Geographical Mobility in the Past Year for Residence 1 Year Ago in Puerto Rico"          
## [79] "Median Income in the Past 12 Months (in 2023 Inflation-Adjusted Dollars) by Geographical Mobility in the Past Year for Residence 1 Year Ago in the United States"    
## [80] "Geographical Mobility in the Past Year by Poverty Status in the Past 12 Months for Residence 1 Year Ago in Puerto Rico"                                              
## [81] "Geographical Mobility in the Past Year by Poverty Status in the Past 12 Months for Residence 1 Year Ago in the United States"                                        
## [82] "Geographical Mobility in the Past Year by Tenure for Residence 1 Year Ago in Puerto Rico"                                                                            
## [83] "Geographical Mobility in the Past Year by Tenure for Residence 1 Year Ago in the United States"                                                                      
## [84] "Group Quarters Type (3 Types) by Geographical Mobility in the Past Year for Current Residence in the United States"                                                  
## [85] "Group Quarters Type (5 Types) by Geographical Mobility in the Past Year for Current Residence in the United States"
## Grab variables related to concept of interest
acs1_vars_2023 %>%
filter(concept == paste0("Geographical Mobility in the Past Year ",
                         "by Age for Current Residence in the ",
                         "United States"))
## # A tibble: 156 × 3
##    name       label                             concept                         
##    <chr>      <chr>                             <chr>                           
##  1 B07001_001 Estimate!!Total:                  Geographical Mobility in the Pa…
##  2 B07001_002 Estimate!!Total:!!1 to 4 years    Geographical Mobility in the Pa…
##  3 B07001_003 Estimate!!Total:!!5 to 17 years   Geographical Mobility in the Pa…
##  4 B07001_004 Estimate!!Total:!!18 and 19 years Geographical Mobility in the Pa…
##  5 B07001_005 Estimate!!Total:!!20 to 24 years  Geographical Mobility in the Pa…
##  6 B07001_006 Estimate!!Total:!!25 to 29 years  Geographical Mobility in the Pa…
##  7 B07001_007 Estimate!!Total:!!30 to 34 years  Geographical Mobility in the Pa…
##  8 B07001_008 Estimate!!Total:!!35 to 39 years  Geographical Mobility in the Pa…
##  9 B07001_009 Estimate!!Total:!!40 to 44 years  Geographical Mobility in the Pa…
## 10 B07001_010 Estimate!!Total:!!45 to 49 years  Geographical Mobility in the Pa…
## # ℹ 146 more rows
## Get list of all variables in 2012
acs1_vars_2012 <- load_variables(2012, dataset = "acs1")

## Find migration "concepts"
acs1_vars_2012 %>% filter(grepl("GEOGRAPHICAL MOBILITY", concept,
ignore.case = TRUE)) %>%
  pull(concept) %>%
  unique()
##  [1] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR BY AGE FOR CURRENT RESIDENCE IN PUERTO RICO"                                                                                  
##  [2] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR BY AGE FOR CURRENT RESIDENCE IN THE UNITED STATES"                                                                            
##  [3] "MEDIAN AGE BY GEOGRAPHICAL MOBILITY IN THE PAST YEAR FOR CURRENT RESIDENCE IN PUERTO RICO"                                                                           
##  [4] "MEDIAN AGE BY GEOGRAPHICAL MOBILITY IN THE PAST YEAR FOR CURRENT RESIDENCE IN THE UNITED STATES"                                                                     
##  [5] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR BY SEX FOR CURRENT RESIDENCE IN PUERTO RICO"                                                                                  
##  [6] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR BY SEX FOR CURRENT RESIDENCE IN THE UNITED STATES"                                                                            
##  [7] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR (WHITE ALONE) FOR CURRENT RESIDENCE IN PUERTO RICO"                                                                           
##  [8] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR (WHITE ALONE) FOR CURRENT RESIDENCE IN THE UNITED STATES"                                                                     
##  [9] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR (BLACK OR AFRICAN AMERICAN ALONE) FOR CURRENT RESIDENCE IN PUERTO RICO"                                                       
## [10] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR (BLACK OR AFRICAN AMERICAN ALONE) FOR CURRENT RESIDENCE IN THE UNITED STATES"                                                 
## [11] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR (AMERICAN INDIAN AND ALASKA NATIVE ALONE) FOR CURRENT RESIDENCE IN THE UNITED STATES"                                         
## [12] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR (ASIAN ALONE) FOR CURRENT RESIDENCE IN PUERTO RICO"                                                                           
## [13] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR (ASIAN ALONE) FOR CURRENT RESIDENCE IN THE UNITED STATES"                                                                     
## [14] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR (NATIVE HAWAIIAN AND OTHER PACIFIC ISLANDER ALONE) FOR CURRENT RESIDENCE IN THE UNITED STATES"                                
## [15] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR (SOME OTHER RACE ALONE) FOR CURRENT RESIDENCE IN PUERTO RICO"                                                                 
## [16] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR (SOME OTHER RACE ALONE) FOR CURRENT RESIDENCE IN THE UNITED STATES"                                                           
## [17] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR (TWO OR MORE RACES) FOR CURRENT RESIDENCE IN PUERTO RICO"                                                                     
## [18] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR (TWO OR MORE RACES) FOR CURRENT RESIDENCE IN THE UNITED STATES"                                                               
## [19] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR (WHITE ALONE, NOT HISPANIC OR LATINO) FOR CURRENT RESIDENCE IN PUERTO RICO"                                                   
## [20] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR (WHITE ALONE, NOT HISPANIC OR LATINO) FOR CURRENT RESIDENCE IN THE UNITED STATES"                                             
## [21] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR (HISPANIC OR LATINO) FOR CURRENT RESIDENCE IN PUERTO RICO"                                                                    
## [22] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR (HISPANIC OR LATINO) FOR CURRENT RESIDENCE IN THE UNITED STATES"                                                              
## [23] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR BY CITIZENSHIP STATUS FOR CURRENT RESIDENCE IN PUERTO RICO"                                                                   
## [24] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR BY CITIZENSHIP STATUS FOR CURRENT RESIDENCE IN THE UNITED STATES"                                                             
## [25] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR BY MARITAL STATUS FOR CURRENT RESIDENCE IN PUERTO RICO"                                                                       
## [26] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR BY MARITAL STATUS FOR CURRENT RESIDENCE IN THE UNITED STATES"                                                                 
## [27] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR BY EDUCATIONAL ATTAINMENT FOR CURRENT RESIDENCE IN PUERTO RICO"                                                               
## [28] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR BY EDUCATIONAL ATTAINMENT FOR CURRENT RESIDENCE IN THE UNITED STATES"                                                         
## [29] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR BY INDIVIDUAL INCOME IN THE PAST 12 MONTHS (IN 2012 INFLATION-ADJUSTED DOLLARS) FOR CURRENT RESIDENCE IN PUERTO RICO"         
## [30] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR BY INDIVIDUAL INCOME IN THE PAST 12 MONTHS (IN 2012 INFLATION-ADJUSTED DOLLARS) FOR CURRENT RESIDENCE IN THE UNITED STATES"   
## [31] "MEDIAN INCOME IN THE PAST 12 MONTHS (IN 2012 INFLATION-ADJUSTED DOLLARS) BY GEOGRAPHICAL MOBILITY IN THE PAST YEAR FOR CURRENT RESIDENCE IN PUERTO RICO"             
## [32] "MEDIAN INCOME IN THE PAST 12 MONTHS (IN 2012 INFLATION-ADJUSTED DOLLARS) BY GEOGRAPHICAL MOBILITY IN THE PAST YEAR FOR CURRENT RESIDENCE IN THE UNITED STATES"       
## [33] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR BY POVERTY STATUS IN THE PAST 12 MONTHS FOR CURRENT RESIDENCE IN PUERTO RICO"                                                 
## [34] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR BY POVERTY STATUS IN THE PAST 12 MONTHS FOR CURRENT RESIDENCE IN THE UNITED STATES"                                           
## [35] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR BY TENURE FOR CURRENT RESIDENCE IN PUERTO RICO"                                                                               
## [36] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR BY TENURE FOR CURRENT RESIDENCE IN THE UNITED STATES"                                                                         
## [37] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR FOR CURRENT RESIDENCE--METROPOLITAN STATISTICAL AREA LEVEL IN PUERTO RICO"                                                    
## [38] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR FOR CURRENT RESIDENCE--METROPOLITAN STATISTICAL AREA LEVEL IN THE UNITED STATES"                                              
## [39] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR FOR CURRENT RESIDENCE--MICROPOLITAN STATISTICAL AREA LEVEL IN PUERTO RICO"                                                    
## [40] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR FOR CURRENT RESIDENCE--MICROPOLITAN STATISTICAL AREA LEVEL IN THE UNITED STATES"                                              
## [41] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR FOR CURRENT RESIDENCE--NOT METROPOLITAN OR MICROPOLITAN STATISTICAL AREA LEVEL IN THE UNITED STATES"                          
## [42] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR FOR CURRENT RESIDENCE--STATE, COUNTY AND PLACE LEVEL IN PUERTO RICO"                                                          
## [43] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR FOR CURRENT RESIDENCE--STATE, COUNTY AND PLACE LEVEL IN THE UNITED STATES"                                                    
## [44] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR BY AGE FOR RESIDENCE 1 YEAR AGO IN PUERTO RICO"                                                                               
## [45] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR BY AGE FOR RESIDENCE 1 YEAR AGO IN THE UNITED STATES"                                                                         
## [46] "MEDIAN AGE BY GEOGRAPHICAL MOBILITY IN THE PAST YEAR FOR RESIDENCE 1 YEAR AGO IN PUERTO RICO"                                                                        
## [47] "MEDIAN AGE BY GEOGRAPHICAL MOBILITY IN THE PAST YEAR FOR RESIDENCE 1 YEAR AGO IN THE UNITED STATES"                                                                  
## [48] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR BY SEX FOR RESIDENCE 1 YEAR AGO IN PUERTO RICO"                                                                               
## [49] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR BY SEX FOR RESIDENCE 1 YEAR AGO IN THE UNITED STATES"                                                                         
## [50] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR (WHITE ALONE) FOR RESIDENCE 1 YEAR AGO IN PUERTO RICO"                                                                        
## [51] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR (WHITE ALONE) FOR RESIDENCE 1 YEAR AGO IN THE UNITED STATES"                                                                  
## [52] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR (BLACK OR AFRICAN AMERICAN ALONE) FOR RESIDENCE 1 YEAR AGO IN PUERTO RICO"                                                    
## [53] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR (BLACK OR AFRICAN AMERICAN ALONE) FOR RESIDENCE 1 YEAR AGO IN THE UNITED STATES"                                              
## [54] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR (AMERICAN INDIAN AND ALASKA NATIVE ALONE) FOR RESIDENCE 1 YEAR AGO IN PUERTO RICO"                                            
## [55] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR (AMERICAN INDIAN AND ALASKA NATIVE ALONE) FOR RESIDENCE 1 YEAR AGO IN THE UNITED STATES"                                      
## [56] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR (ASIAN ALONE) FOR RESIDENCE 1 YEAR AGO IN PUERTO RICO"                                                                        
## [57] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR (ASIAN ALONE) FOR RESIDENCE 1 YEAR AGO IN THE UNITED STATES"                                                                  
## [58] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR (NATIVE HAWAIIAN AND OTHER PACIFIC ISLANDER ALONE) FOR RESIDENCE 1 YEAR AGO IN THE UNITED STATES"                             
## [59] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR (SOME OTHER RACE ALONE) FOR RESIDENCE 1 YEAR AGO IN PUERTO RICO"                                                              
## [60] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR (SOME OTHER RACE ALONE) FOR RESIDENCE 1 YEAR AGO IN THE UNITED STATES"                                                        
## [61] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR (TWO OR MORE RACES) FOR RESIDENCE 1 YEAR AGO IN PUERTO RICO"                                                                  
## [62] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR (TWO OR MORE RACES) FOR RESIDENCE 1 YEAR AGO IN THE UNITED STATES"                                                            
## [63] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR (WHITE ALONE, NOT HISPANIC OR LATINO) FOR RESIDENCE 1 YEAR AGO IN PUERTO RICO"                                                
## [64] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR (WHITE ALONE, NOT HISPANIC OR LATINO) FOR RESIDENCE 1 YEAR AGO IN THE UNITED STATES"                                          
## [65] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR (HISPANIC OR LATINO) FOR RESIDENCE 1 YEAR AGO IN PUERTO RICO"                                                                 
## [66] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR (HISPANIC OR LATINO) FOR RESIDENCE 1 YEAR AGO IN THE UNITED STATES"                                                           
## [67] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR BY CITIZENSHIP STATUS FOR RESIDENCE 1 YEAR AGO IN PUERTO RICO"                                                                
## [68] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR BY CITIZENSHIP STATUS FOR RESIDENCE 1 YEAR AGO IN THE UNITED STATES"                                                          
## [69] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR BY MARITAL STATUS FOR RESIDENCE 1 YEAR AGO IN PUERTO RICO"                                                                    
## [70] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR BY MARITAL STATUS FOR RESIDENCE 1 YEAR AGO IN THE UNITED STATES"                                                              
## [71] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR BY EDUCATIONAL ATTAINMENT FOR RESIDENCE 1 YEAR AGO IN PUERTO RICO"                                                            
## [72] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR BY EDUCATIONAL ATTAINMENT FOR RESIDENCE 1 YEAR AGO IN THE UNITED STATES"                                                      
## [73] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR BY INDIVIDUAL INCOME IN THE PAST 12 MONTHS (IN 2012 INFLATION-ADJUSTED DOLLARS) FOR RESIDENCE 1 YEAR AGO IN PUERTO RICO"      
## [74] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR BY INDIVIDUAL INCOME IN THE PAST 12 MONTHS (IN 2012 INFLATION-ADJUSTED DOLLARS) FOR RESIDENCE 1 YEAR AGO IN THE UNITED STATES"
## [75] "MEDIAN INCOME IN THE PAST 12 MONTHS (IN 2012 INFLATION-ADJUSTED DOLLARS) BY GEOGRAPHICAL MOBILITY IN THE PAST YEAR FOR RESIDENCE 1 YEAR AGO IN PUERTO RICO"          
## [76] "MEDIAN INCOME IN THE PAST 12 MONTHS (IN 2012 INFLATION-ADJUSTED DOLLARS) BY GEOGRAPHICAL MOBILITY IN THE PAST YEAR FOR RESIDENCE 1 YEAR AGO IN THE UNITED STATES"    
## [77] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR BY POVERTY STATUS IN THE PAST 12 MONTHS FOR RESIDENCE 1 YEAR AGO IN PUERTO RICO"                                              
## [78] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR BY POVERTY STATUS IN THE PAST 12 MONTHS FOR RESIDENCE 1 YEAR AGO IN THE UNITED STATES"                                        
## [79] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR BY TENURE FOR RESIDENCE 1 YEAR AGO IN PUERTO RICO"                                                                            
## [80] "GEOGRAPHICAL MOBILITY IN THE PAST YEAR BY TENURE FOR RESIDENCE 1 YEAR AGO IN THE UNITED STATES"
## Are variable identifiers the same in 2012? Yes
acs1_vars_2012 %>%
filter(concept == toupper(paste0("Geographical Mobility in the Past Year ",
                                 "by Age for Current Residence in the ",
                                 "United States")))
## # A tibble: 156 × 3
##    name       label                            concept                          
##    <chr>      <chr>                            <chr>                            
##  1 B07001_001 Estimate!!Total                  GEOGRAPHICAL MOBILITY IN THE PAS…
##  2 B07001_002 Estimate!!Total!!1 to 4 years    GEOGRAPHICAL MOBILITY IN THE PAS…
##  3 B07001_003 Estimate!!Total!!5 to 17 years   GEOGRAPHICAL MOBILITY IN THE PAS…
##  4 B07001_004 Estimate!!Total!!18 and 19 years GEOGRAPHICAL MOBILITY IN THE PAS…
##  5 B07001_005 Estimate!!Total!!20 to 24 years  GEOGRAPHICAL MOBILITY IN THE PAS…
##  6 B07001_006 Estimate!!Total!!25 to 29 years  GEOGRAPHICAL MOBILITY IN THE PAS…
##  7 B07001_007 Estimate!!Total!!30 to 34 years  GEOGRAPHICAL MOBILITY IN THE PAS…
##  8 B07001_008 Estimate!!Total!!35 to 39 years  GEOGRAPHICAL MOBILITY IN THE PAS…
##  9 B07001_009 Estimate!!Total!!40 to 44 years  GEOGRAPHICAL MOBILITY IN THE PAS…
## 10 B07001_010 Estimate!!Total!!45 to 49 years  GEOGRAPHICAL MOBILITY IN THE PAS…
## # ℹ 146 more rows
## To pull a whole table of variables
kc_mig_2023 <- get_acs(geography = "county",
                       table = "B07001",
                       year = 2023,
                       state = "53",
                       county = "033",
                       survey = "acs1") %>%
              left_join(acs1_vars_2023, by = c("variable" = "name"))
## Getting data from the 2023 1-year ACS
## The 1-year ACS provides data for geographies with populations of 65,000 and greater.
## To get a specific small set of variables
# kc_mig_2023_tract <- get_acs(geography = "tract",
#                        variables = c("B07001_001", "B07001_017",
#                                      "B07001_033", "B07001_049",
#                                      "B07001_065", "B07001_081"),
#                        year = 2023,
#                        state = "53", #FIPS code for washington
#                        county = "033", #FIPS code for king county
#                        survey = "acs1") %>%
#               left_join(acs1_vars_2023, by = c("variable" = "name"))

## To get 5-year tract-level variables
## change geography = "tract"
## survey = "acs5"

##(a) 10 points

In a table, present the number of individuals in King County in 2012 and 2023 who moved: 1) from a different county within the same state, 2) from a different state, and 3) from abroad. If there are non-missing values of the moe column, also present 90% confidence intervals which can be constructed as estimate ± moe.

This answer uses code from SOC/CSSS/CSDE 533 Template of HW #5 provided by Jess Godwin.

The estimates and CI values of those moving within the same county will also be included for the purposes of comparing with the tract-level data in part (d).

kc_mig_2012_spec <- get_acs(geography = "county",
                       variables = c("B07001_001", "B07001_017",
                                     "B07001_033", "B07001_049",
                                     "B07001_065", "B07001_081"),
                       year = 2012,
                       state = "53", #FIPS code for washington
                       county = "033", #FIPS code for king county
                       survey = "acs1") %>%
              left_join(acs1_vars_2023, by = c("variable" = "name"))
## Getting data from the 2012 1-year ACS
## The 1-year ACS provides data for geographies with populations of 65,000 and greater.
kc_mig_2023_spec <- get_acs(geography = "county",
                       variables = c("B07001_001", "B07001_017",
                                     "B07001_033", "B07001_049",
                                     "B07001_065", "B07001_081"),
                       year = 2023,
                       state = "53", #FIPS code for washington
                       county = "033", #FIPS code for king county
                       survey = "acs1") %>%
              left_join(acs1_vars_2023, by = c("variable" = "name"))
## Getting data from the 2023 1-year ACS
## The 1-year ACS provides data for geographies with populations of 65,000 and greater.
kc_mig_2023_spec
## # A tibble: 6 × 7
##   GEOID NAME                    variable   estimate   moe label          concept
##   <chr> <chr>                   <chr>         <dbl> <dbl> <chr>          <chr>  
## 1 53033 King County, Washington B07001_001  2248046  2394 Estimate!!Tot… Geogra…
## 2 53033 King County, Washington B07001_017  1876645 16739 Estimate!!Tot… Geogra…
## 3 53033 King County, Washington B07001_033   217396 12948 Estimate!!Tot… Geogra…
## 4 53033 King County, Washington B07001_049    37684  5534 Estimate!!Tot… Geogra…
## 5 53033 King County, Washington B07001_065    79927  7485 Estimate!!Tot… Geogra…
## 6 53033 King County, Washington B07001_081    36394  5008 Estimate!!Tot… Geogra…
acs1_vars_2023
## # A tibble: 36,699 × 3
##    name        label                                    concept                 
##    <chr>       <chr>                                    <chr>                   
##  1 B01001A_001 Estimate!!Total:                         Sex by Age (White Alone)
##  2 B01001A_002 Estimate!!Total:!!Male:                  Sex by Age (White Alone)
##  3 B01001A_003 Estimate!!Total:!!Male:!!Under 5 years   Sex by Age (White Alone)
##  4 B01001A_004 Estimate!!Total:!!Male:!!5 to 9 years    Sex by Age (White Alone)
##  5 B01001A_005 Estimate!!Total:!!Male:!!10 to 14 years  Sex by Age (White Alone)
##  6 B01001A_006 Estimate!!Total:!!Male:!!15 to 17 years  Sex by Age (White Alone)
##  7 B01001A_007 Estimate!!Total:!!Male:!!18 and 19 years Sex by Age (White Alone)
##  8 B01001A_008 Estimate!!Total:!!Male:!!20 to 24 years  Sex by Age (White Alone)
##  9 B01001A_009 Estimate!!Total:!!Male:!!25 to 29 years  Sex by Age (White Alone)
## 10 B01001A_010 Estimate!!Total:!!Male:!!30 to 34 years  Sex by Age (White Alone)
## # ℹ 36,689 more rows
acs1_vars_2012
## # A tibble: 34,365 × 3
##    name        label                                  concept                   
##    <chr>       <chr>                                  <chr>                     
##  1 B00001_001  Estimate!!Total                        UNWEIGHTED SAMPLE COUNT O…
##  2 B00002_001  Estimate!!Total                        UNWEIGHTED SAMPLE HOUSING…
##  3 B01001A_001 Estimate!!Total                        SEX BY AGE (WHITE ALONE)  
##  4 B01001A_002 Estimate!!Total!!Male                  SEX BY AGE (WHITE ALONE)  
##  5 B01001A_003 Estimate!!Total!!Male!!Under 5 years   SEX BY AGE (WHITE ALONE)  
##  6 B01001A_004 Estimate!!Total!!Male!!5 to 9 years    SEX BY AGE (WHITE ALONE)  
##  7 B01001A_005 Estimate!!Total!!Male!!10 to 14 years  SEX BY AGE (WHITE ALONE)  
##  8 B01001A_006 Estimate!!Total!!Male!!15 to 17 years  SEX BY AGE (WHITE ALONE)  
##  9 B01001A_007 Estimate!!Total!!Male!!18 and 19 years SEX BY AGE (WHITE ALONE)  
## 10 B01001A_008 Estimate!!Total!!Male!!20 to 24 years  SEX BY AGE (WHITE ALONE)  
## # ℹ 34,355 more rows
#age by sex is B01001_001 to B01001_049
kc_mig_2023 = kc_mig_2023 %>%
  select(-GEOID, -NAME)

kc_mig_2023
## # A tibble: 96 × 5
##    variable   estimate   moe label                             concept          
##    <chr>         <dbl> <dbl> <chr>                             <chr>            
##  1 B07001_001  2248046  2394 Estimate!!Total:                  Geographical Mob…
##  2 B07001_002    91105  2396 Estimate!!Total:!!1 to 4 years    Geographical Mob…
##  3 B07001_003   320411    NA Estimate!!Total:!!5 to 17 years   Geographical Mob…
##  4 B07001_004    48672   894 Estimate!!Total:!!18 and 19 years Geographical Mob…
##  5 B07001_005   134584   895 Estimate!!Total:!!20 to 24 years  Geographical Mob…
##  6 B07001_006   193904    NA Estimate!!Total:!!25 to 29 years  Geographical Mob…
##  7 B07001_007   218307  1135 Estimate!!Total:!!30 to 34 years  Geographical Mob…
##  8 B07001_008   199507  6057 Estimate!!Total:!!35 to 39 years  Geographical Mob…
##  9 B07001_009   168343  6027 Estimate!!Total:!!40 to 44 years  Geographical Mob…
## 10 B07001_010   146575  1141 Estimate!!Total:!!45 to 49 years  Geographical Mob…
## # ℹ 86 more rows
kc_mig_2023_spec
## # A tibble: 6 × 7
##   GEOID NAME                    variable   estimate   moe label          concept
##   <chr> <chr>                   <chr>         <dbl> <dbl> <chr>          <chr>  
## 1 53033 King County, Washington B07001_001  2248046  2394 Estimate!!Tot… Geogra…
## 2 53033 King County, Washington B07001_017  1876645 16739 Estimate!!Tot… Geogra…
## 3 53033 King County, Washington B07001_033   217396 12948 Estimate!!Tot… Geogra…
## 4 53033 King County, Washington B07001_049    37684  5534 Estimate!!Tot… Geogra…
## 5 53033 King County, Washington B07001_065    79927  7485 Estimate!!Tot… Geogra…
## 6 53033 King County, Washington B07001_081    36394  5008 Estimate!!Tot… Geogra…

Variables to use: 1) variable = B07001_049; label = “Estimate!!Total:!!Moved from different county within same state:” 2) variable = B07001_065; label = “Estimate!!Total:!!Moved from different state:” 3) variable = B07001_081; label = “Estimate!!Total:!!Moved from abroad:”

# grab the data
diff_county_2012 = kc_mig_2012_spec %>% 
  filter(variable %in% c('B07001_033', 'B07001_049', 'B07001_065', 'B07001_081')) %>% 
  select(-GEOID, -NAME, -variable, -concept) %>% 
  mutate(Year = 2012)

diff_county_2023 = kc_mig_2023_spec %>% 
  filter(variable %in% c('B07001_033', 'B07001_049', 'B07001_065', 'B07001_081')) %>% 
  select(-GEOID, -NAME, -variable, -concept) %>% 
  mutate(Year = 2023)

# make the upper and lower confidence interval bounds
kc_county_2012_2023 <- bind_rows(diff_county_2012, diff_county_2023) %>%
  mutate(
    lower_CI = estimate - 1.645 * moe,
    upper_CI = estimate + 1.645 * moe,
    width_CI = upper_CI - lower_CI) 

kc_county_2012_2023 = kc_county_2012_2023 %>% 
  select(Year, estimate, moe, label, lower_CI, upper_CI, width_CI)

kc_county_2012_2023_wide <- kc_county_2012_2023 %>%
  pivot_wider(
    names_from = label,
    values_from = c(estimate, moe, lower_CI, upper_CI, width_CI)
  ) %>%
  select(Year, everything())

kc_county_2012_2023_wide
## # A tibble: 2 × 21
##    Year estimate_Estimate!!Total…¹ estimate_Estimate!!T…² estimate_Estimate!!T…³
##   <dbl>                      <dbl>                  <dbl>                  <dbl>
## 1  2012                     230287                  39284                  76978
## 2  2023                     217396                  37684                  79927
## # ℹ abbreviated names: ¹​`estimate_Estimate!!Total:!!Moved within same county:`,
## #   ²​`estimate_Estimate!!Total:!!Moved from different county within same state:`,
## #   ³​`estimate_Estimate!!Total:!!Moved from different state:`
## # ℹ 17 more variables: `estimate_Estimate!!Total:!!Moved from abroad:` <dbl>,
## #   `moe_Estimate!!Total:!!Moved within same county:` <dbl>,
## #   `moe_Estimate!!Total:!!Moved from different county within same state:` <dbl>,
## #   `moe_Estimate!!Total:!!Moved from different state:` <dbl>, …
kable(kc_county_2012_2023_wide, 
      digits = 2,
      caption = "Migration Granularity in King County (2012 vs. 2023)") %>%
  kable_styling(font_size = 15,
                bootstrap_options = c("striped", "condensed")) %>%
  scroll_box(width = "1000px", height = "400px")
Migration Granularity in King County (2012 vs. 2023)
Year estimate_Estimate!!Total:!!Moved within same county: estimate_Estimate!!Total:!!Moved from different county within same state: estimate_Estimate!!Total:!!Moved from different state: estimate_Estimate!!Total:!!Moved from abroad: moe_Estimate!!Total:!!Moved within same county: moe_Estimate!!Total:!!Moved from different county within same state: moe_Estimate!!Total:!!Moved from different state: moe_Estimate!!Total:!!Moved from abroad: lower_CI_Estimate!!Total:!!Moved within same county: lower_CI_Estimate!!Total:!!Moved from different county within same state: lower_CI_Estimate!!Total:!!Moved from different state: lower_CI_Estimate!!Total:!!Moved from abroad: upper_CI_Estimate!!Total:!!Moved within same county: upper_CI_Estimate!!Total:!!Moved from different county within same state: upper_CI_Estimate!!Total:!!Moved from different state: upper_CI_Estimate!!Total:!!Moved from abroad: width_CI_Estimate!!Total:!!Moved within same county: width_CI_Estimate!!Total:!!Moved from different county within same state: width_CI_Estimate!!Total:!!Moved from different state: width_CI_Estimate!!Total:!!Moved from abroad:
2012 230287 39284 76978 25095 13816 3970 9069 4077 207559.7 32753.35 62059.49 18388.33 253014.3 45814.65 91896.51 31801.67 45454.64 13061.30 29837.01 13413.33
2023 217396 37684 79927 36394 12948 5534 7485 5008 196096.5 28580.57 67614.18 28155.84 238695.5 46787.43 92239.82 44632.16 42598.92 18206.86 24625.65 16476.32

Moving locally to King County decreased from 2012 to 2023, while moving from out of state and from abroad went up.

##(b) 10 points

Present two population pyramids:

1) With combined estimates by age of individuals who lived in the same house 1 year ago and moved within the same county year ago one on one side and those who moved from a different county in the same state on the other.

# define out of the way
age_groups <- c("1-4", "5-17", "18-19", "20-24", "25-29", 
                "30-34", "35-39", "40-44", "45-49", "50-54", "55-59", 
                "60-64", "65-69", "70-74", "75+")


# Same house 1 yr ago is B07001_018 to B07001_032
same_house = kc_mig_2023 %>%
  slice(18:32) %>%
  rename(estimate_same = estimate, moe_same = moe)

# moved within same county is B07001_033 to B07001_048
moved_within = kc_mig_2023 %>%
  slice(34:48) %>%
  rename(estimate_within_county = estimate, moe_within_county = moe)

# combine these: 
kc_stayed_within_county = same_house %>%
  mutate(
    estimate = estimate_same + moved_within$estimate_within_county,
    moe = moe_same + moved_within$moe_within_county, 
    estimate = -estimate,
    moe = -moe,
    label = label) %>%
  mutate(label = age_groups) %>% 
  mutate(type = "Moved/Stayed Within King County") %>% 
  select(label, estimate, moe, type) 

# compared with this:
# moved from a different county B07001_050 to B07001_064
kc_different_county = kc_mig_2023 %>%
  slice(50:64) %>% 
  mutate(label = age_groups) %>%
  mutate(type = "Moved From a Different County") %>%
  select(label, estimate, moe, type)
kc_popplot1 = rbind(kc_stayed_within_county, kc_different_county)

kc_popplot1 = kc_popplot1 %>% 
  mutate(label = factor(label, levels = age_groups))

# kc_popplot1

ggplot(kc_popplot1, aes(x = label, y = estimate, fill = type)) +
    geom_bar(stat = "identity") +
    coord_flip() +
    theme_minimal() +
    theme(
      axis.text.x = element_text(angle = 45, hjust = 1),
      plot.title = element_text(size = 10),
      legend.title = element_blank()
    ) +
    labs(
      x = "Age Group", y = "Population",
      fill = "type", title = paste("King County Migration Population Pyramid, 2023 (Moved From Same County vs. Different County)"), sep = "") +
    scale_fill_manual(values = c("Moved/Stayed Within King County" = "steelblue", "Moved From a Different County" = "slateblue")) +
    scale_y_continuous(limits = c(-310000, 310000), breaks = seq(-300000, 300000, by = 25000), labels = scales::comma)

2) With those who moved from a different state on one side and those who moved from abroad on the other. Make sure the x-axes have the same limits for each plot.

kc_different_state <- kc_mig_2023 %>%
  slice(66:80) %>% 
  mutate(label = age_groups,
         estimate = -estimate) %>%
  mutate(type = "Moved From a Different State") %>%
  select(label, estimate, moe, type)

kc_abroad <- kc_mig_2023 %>%
  slice(82:96) %>%
  mutate(label = age_groups) %>%
  mutate(type = "Moved From Abroad") %>%
  select(label, estimate, moe, type)


kc_popplot2 = rbind(kc_different_state, kc_abroad)

kc_popplot2 = kc_popplot2 %>% 
  mutate(label = factor(label, levels = age_groups))

ggplot(kc_popplot2, aes(x = label, y = estimate, fill = type)) +
    geom_bar(stat = "identity") +
    coord_flip() +
    theme_minimal() +
    theme(
      axis.text.x = element_text(angle = 45, hjust = 1),
      plot.title = element_text(size = 10),
      legend.title = element_blank()
    ) +
    labs(
      x = "Age Group", y = "Population",
      fill = "type", title = paste("King County Migration Population Pyramid, 2023 (Moved From a Different State vs. From Abroad)"), sep = "") +
    scale_fill_manual(values = c("Moved From a Different State" = "steelblue", "Moved From Abroad" = "slateblue")) +
    scale_y_continuous(limits = c(-20000, 20000), breaks = seq(-20000, 20000, by = 2000), labels = scales::comma)

##(c) 10 points

Present four maps with Census tract-level total estimates of:

1) those who moved within the same county 2) those who moved from another county in the same state 3) those who moved from another state 4) those who moved from abroad

This answer uses code from SOC/CSSS/CSDE 533 Lab 7 provided by Jess Godwin.

same_county_values = get_acs(geography = "tract", 
                variables = "B07001_033", 
                state = "53",
                county = "033",
                geometry = TRUE)
## Getting data from the 2019-2023 5-year ACS
## Downloading feature geometry from the Census website.  To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## Downloading: 2.7 kB     Downloading: 2.7 kB     Downloading: 11 kB     Downloading: 11 kB     Downloading: 11 kB     Downloading: 11 kB     Downloading: 12 kB     Downloading: 12 kB     Downloading: 12 kB     Downloading: 12 kB     Downloading: 19 kB     Downloading: 19 kB     Downloading: 24 kB     Downloading: 24 kB     Downloading: 24 kB     Downloading: 24 kB     Downloading: 38 kB     Downloading: 38 kB     Downloading: 52 kB     Downloading: 52 kB     Downloading: 52 kB     Downloading: 52 kB     Downloading: 60 kB     Downloading: 60 kB     Downloading: 60 kB     Downloading: 60 kB     Downloading: 64 kB     Downloading: 64 kB     Downloading: 64 kB     Downloading: 64 kB     Downloading: 64 kB     Downloading: 64 kB     Downloading: 73 kB     Downloading: 73 kB     Downloading: 73 kB     Downloading: 73 kB     Downloading: 78 kB     Downloading: 78 kB     Downloading: 78 kB     Downloading: 78 kB     Downloading: 78 kB     Downloading: 78 kB     Downloading: 86 kB     Downloading: 86 kB     Downloading: 86 kB     Downloading: 86 kB     Downloading: 90 kB     Downloading: 90 kB     Downloading: 99 kB     Downloading: 99 kB     Downloading: 99 kB     Downloading: 99 kB     Downloading: 110 kB     Downloading: 110 kB     Downloading: 110 kB     Downloading: 110 kB     Downloading: 130 kB     Downloading: 130 kB     Downloading: 140 kB     Downloading: 140 kB     Downloading: 140 kB     Downloading: 140 kB     Downloading: 150 kB     Downloading: 150 kB     Downloading: 150 kB     Downloading: 150 kB     Downloading: 160 kB     Downloading: 160 kB     Downloading: 170 kB     Downloading: 170 kB     Downloading: 170 kB     Downloading: 170 kB     Downloading: 170 kB     Downloading: 170 kB     Downloading: 180 kB     Downloading: 180 kB     Downloading: 180 kB     Downloading: 180 kB     Downloading: 180 kB     Downloading: 180 kB     Downloading: 190 kB     Downloading: 190 kB     Downloading: 190 kB     Downloading: 190 kB     Downloading: 200 kB     Downloading: 200 kB     Downloading: 210 kB     Downloading: 210 kB     Downloading: 210 kB     Downloading: 210 kB     Downloading: 210 kB     Downloading: 210 kB     Downloading: 210 kB     Downloading: 210 kB     Downloading: 210 kB     Downloading: 210 kB     Downloading: 210 kB     Downloading: 210 kB     Downloading: 230 kB     Downloading: 230 kB     Downloading: 240 kB     Downloading: 240 kB     Downloading: 240 kB     Downloading: 240 kB     Downloading: 240 kB     Downloading: 240 kB     Downloading: 260 kB     Downloading: 260 kB     Downloading: 260 kB     Downloading: 260 kB     Downloading: 260 kB     Downloading: 260 kB     Downloading: 260 kB     Downloading: 260 kB     Downloading: 260 kB     Downloading: 260 kB     Downloading: 270 kB     Downloading: 270 kB     Downloading: 270 kB     Downloading: 270 kB     Downloading: 270 kB     Downloading: 270 kB     Downloading: 270 kB     Downloading: 270 kB     Downloading: 270 kB     Downloading: 270 kB     Downloading: 270 kB     Downloading: 270 kB     Downloading: 270 kB     Downloading: 270 kB     Downloading: 290 kB     Downloading: 290 kB     Downloading: 290 kB     Downloading: 290 kB     Downloading: 290 kB     Downloading: 290 kB     Downloading: 310 kB     Downloading: 310 kB     Downloading: 310 kB     Downloading: 310 kB     Downloading: 310 kB     Downloading: 310 kB     Downloading: 310 kB     Downloading: 310 kB     Downloading: 320 kB     Downloading: 320 kB     Downloading: 320 kB     Downloading: 320 kB     Downloading: 320 kB     Downloading: 320 kB     Downloading: 320 kB     Downloading: 320 kB     Downloading: 320 kB     Downloading: 320 kB     Downloading: 320 kB     Downloading: 320 kB     Downloading: 320 kB     Downloading: 320 kB     Downloading: 320 kB     Downloading: 320 kB     Downloading: 320 kB     Downloading: 320 kB     Downloading: 340 kB     Downloading: 340 kB     Downloading: 360 kB     Downloading: 360 kB     Downloading: 360 kB     Downloading: 360 kB     Downloading: 360 kB     Downloading: 360 kB     Downloading: 360 kB     Downloading: 360 kB     Downloading: 360 kB     Downloading: 360 kB     Downloading: 370 kB     Downloading: 370 kB     Downloading: 390 kB     Downloading: 390 kB     Downloading: 390 kB     Downloading: 390 kB     Downloading: 390 kB     Downloading: 390 kB     Downloading: 390 kB     Downloading: 390 kB     Downloading: 410 kB     Downloading: 410 kB     Downloading: 410 kB     Downloading: 410 kB     Downloading: 410 kB     Downloading: 410 kB     Downloading: 410 kB     Downloading: 410 kB     Downloading: 410 kB     Downloading: 410 kB     Downloading: 410 kB     Downloading: 410 kB     Downloading: 420 kB     Downloading: 420 kB     Downloading: 440 kB     Downloading: 440 kB     Downloading: 440 kB     Downloading: 440 kB     Downloading: 440 kB     Downloading: 440 kB     Downloading: 440 kB     Downloading: 440 kB     Downloading: 440 kB     Downloading: 440 kB     Downloading: 450 kB     Downloading: 450 kB     Downloading: 470 kB     Downloading: 470 kB     Downloading: 470 kB     Downloading: 470 kB     Downloading: 470 kB     Downloading: 470 kB     Downloading: 470 kB     Downloading: 470 kB     Downloading: 470 kB     Downloading: 470 kB     Downloading: 470 kB     Downloading: 470 kB     Downloading: 470 kB     Downloading: 470 kB     Downloading: 490 kB     Downloading: 490 kB     Downloading: 490 kB     Downloading: 490 kB     Downloading: 490 kB     Downloading: 490 kB     Downloading: 490 kB     Downloading: 490 kB     Downloading: 490 kB     Downloading: 490 kB     Downloading: 500 kB     Downloading: 500 kB     Downloading: 500 kB     Downloading: 500 kB     Downloading: 500 kB     Downloading: 500 kB     Downloading: 520 kB     Downloading: 520 kB     Downloading: 540 kB     Downloading: 540 kB     Downloading: 540 kB     Downloading: 540 kB     Downloading: 540 kB     Downloading: 540 kB     Downloading: 540 kB     Downloading: 540 kB     Downloading: 540 kB     Downloading: 540 kB     Downloading: 540 kB     Downloading: 540 kB     Downloading: 540 kB     Downloading: 540 kB     Downloading: 540 kB     Downloading: 540 kB     Downloading: 550 kB     Downloading: 550 kB     Downloading: 550 kB     Downloading: 550 kB     Downloading: 550 kB     Downloading: 550 kB     Downloading: 550 kB     Downloading: 550 kB     Downloading: 570 kB     Downloading: 570 kB     Downloading: 570 kB     Downloading: 570 kB     Downloading: 570 kB     Downloading: 570 kB     Downloading: 590 kB     Downloading: 590 kB     Downloading: 590 kB     Downloading: 590 kB     Downloading: 590 kB     Downloading: 590 kB     Downloading: 600 kB     Downloading: 600 kB     Downloading: 600 kB     Downloading: 600 kB     Downloading: 600 kB     Downloading: 600 kB     Downloading: 600 kB     Downloading: 600 kB     Downloading: 600 kB     Downloading: 600 kB     Downloading: 620 kB     Downloading: 620 kB     Downloading: 630 kB     Downloading: 630 kB     Downloading: 630 kB     Downloading: 630 kB     Downloading: 630 kB     Downloading: 630 kB     Downloading: 650 kB     Downloading: 650 kB     Downloading: 650 kB     Downloading: 650 kB     Downloading: 670 kB     Downloading: 670 kB     Downloading: 670 kB     Downloading: 670 kB     Downloading: 670 kB     Downloading: 670 kB     Downloading: 670 kB     Downloading: 670 kB     Downloading: 670 kB     Downloading: 670 kB     Downloading: 670 kB     Downloading: 670 kB     Downloading: 680 kB     Downloading: 680 kB     Downloading: 680 kB     Downloading: 680 kB     Downloading: 680 kB     Downloading: 680 kB     Downloading: 680 kB     Downloading: 680 kB     Downloading: 680 kB     Downloading: 680 kB     Downloading: 680 kB     Downloading: 680 kB     Downloading: 680 kB     Downloading: 680 kB     Downloading: 680 kB     Downloading: 680 kB     Downloading: 700 kB     Downloading: 700 kB     Downloading: 700 kB     Downloading: 700 kB     Downloading: 700 kB     Downloading: 700 kB     Downloading: 720 kB     Downloading: 720 kB     Downloading: 730 kB     Downloading: 730 kB     Downloading: 730 kB     Downloading: 730 kB     Downloading: 730 kB     Downloading: 730 kB     Downloading: 730 kB     Downloading: 730 kB     Downloading: 730 kB     Downloading: 730 kB     Downloading: 730 kB     Downloading: 730 kB     Downloading: 750 kB     Downloading: 750 kB     Downloading: 750 kB     Downloading: 750 kB     Downloading: 750 kB     Downloading: 750 kB     Downloading: 770 kB     Downloading: 770 kB     Downloading: 780 kB     Downloading: 780 kB     Downloading: 800 kB     Downloading: 800 kB     Downloading: 800 kB     Downloading: 800 kB     Downloading: 800 kB     Downloading: 800 kB     Downloading: 800 kB     Downloading: 800 kB     Downloading: 810 kB     Downloading: 810 kB     Downloading: 810 kB     Downloading: 810 kB     Downloading: 810 kB     Downloading: 810 kB     Downloading: 810 kB     Downloading: 810 kB     Downloading: 830 kB     Downloading: 830 kB     Downloading: 830 kB     Downloading: 830 kB     Downloading: 830 kB     Downloading: 830 kB     Downloading: 830 kB     Downloading: 830 kB     Downloading: 850 kB     Downloading: 850 kB     Downloading: 850 kB     Downloading: 850 kB     Downloading: 850 kB     Downloading: 850 kB     Downloading: 860 kB     Downloading: 860 kB     Downloading: 860 kB     Downloading: 860 kB     Downloading: 860 kB     Downloading: 860 kB     Downloading: 860 kB     Downloading: 860 kB     Downloading: 860 kB     Downloading: 860 kB     Downloading: 860 kB     Downloading: 860 kB     Downloading: 860 kB     Downloading: 860 kB     Downloading: 880 kB     Downloading: 880 kB     Downloading: 880 kB     Downloading: 880 kB     Downloading: 880 kB     Downloading: 880 kB     Downloading: 880 kB     Downloading: 880 kB     Downloading: 900 kB     Downloading: 900 kB     Downloading: 900 kB     Downloading: 900 kB     Downloading: 900 kB     Downloading: 900 kB     Downloading: 900 kB     Downloading: 900 kB     Downloading: 910 kB     Downloading: 910 kB     Downloading: 910 kB     Downloading: 910 kB     Downloading: 910 kB     Downloading: 910 kB     Downloading: 930 kB     Downloading: 930 kB     Downloading: 930 kB     Downloading: 930 kB     Downloading: 930 kB     Downloading: 930 kB     Downloading: 930 kB     Downloading: 930 kB     Downloading: 930 kB     Downloading: 930 kB     Downloading: 950 kB     Downloading: 950 kB     Downloading: 950 kB     Downloading: 950 kB     Downloading: 950 kB     Downloading: 950 kB     Downloading: 960 kB     Downloading: 960 kB     Downloading: 960 kB     Downloading: 960 kB     Downloading: 960 kB     Downloading: 960 kB     Downloading: 960 kB     Downloading: 960 kB     Downloading: 960 kB     Downloading: 960 kB     Downloading: 980 kB     Downloading: 980 kB     Downloading: 980 kB     Downloading: 980 kB     Downloading: 990 kB     Downloading: 990 kB     Downloading: 1 MB     Downloading: 1 MB     Downloading: 1 MB     Downloading: 1 MB     Downloading: 1 MB     Downloading: 1 MB     Downloading: 1 MB     Downloading: 1 MB     Downloading: 1 MB     Downloading: 1 MB     Downloading: 1 MB     Downloading: 1 MB     Downloading: 1 MB     Downloading: 1 MB     Downloading: 1 MB     Downloading: 1 MB     Downloading: 1 MB     Downloading: 1 MB     Downloading: 1.1 MB     Downloading: 1.1 MB     Downloading: 1.1 MB     Downloading: 1.1 MB     Downloading: 1.1 MB     Downloading: 1.1 MB     Downloading: 1.1 MB     Downloading: 1.1 MB     Downloading: 1.1 MB     Downloading: 1.1 MB     Downloading: 1.1 MB     Downloading: 1.1 MB     Downloading: 1.1 MB     Downloading: 1.1 MB     Downloading: 1.1 MB     Downloading: 1.1 MB     Downloading: 1.1 MB     Downloading: 1.1 MB     Downloading: 1.1 MB     Downloading: 1.1 MB     Downloading: 1.1 MB     Downloading: 1.1 MB     Downloading: 1.1 MB     Downloading: 1.1 MB     Downloading: 1.1 MB     Downloading: 1.1 MB     Downloading: 1.1 MB     Downloading: 1.1 MB     Downloading: 1.1 MB     Downloading: 1.1 MB     Downloading: 1.1 MB     Downloading: 1.1 MB     Downloading: 1.1 MB     Downloading: 1.1 MB     Downloading: 1.1 MB     Downloading: 1.1 MB     Downloading: 1.1 MB     Downloading: 1.1 MB     Downloading: 1.1 MB     Downloading: 1.1 MB     Downloading: 1.1 MB     Downloading: 1.1 MB     Downloading: 1.2 MB     Downloading: 1.2 MB     Downloading: 1.2 MB     Downloading: 1.2 MB     Downloading: 1.2 MB     Downloading: 1.2 MB     Downloading: 1.2 MB     Downloading: 1.2 MB     Downloading: 1.2 MB     Downloading: 1.2 MB     Downloading: 1.2 MB     Downloading: 1.2 MB     Downloading: 1.2 MB     Downloading: 1.2 MB     Downloading: 1.2 MB     Downloading: 1.2 MB     Downloading: 1.2 MB     Downloading: 1.2 MB     Downloading: 1.2 MB     Downloading: 1.2 MB     Downloading: 1.2 MB     Downloading: 1.2 MB     Downloading: 1.2 MB     Downloading: 1.2 MB     Downloading: 1.2 MB     Downloading: 1.2 MB     Downloading: 1.2 MB     Downloading: 1.2 MB     Downloading: 1.2 MB     Downloading: 1.2 MB     Downloading: 1.2 MB     Downloading: 1.2 MB     Downloading: 1.2 MB     Downloading: 1.2 MB     Downloading: 1.2 MB     Downloading: 1.2 MB     Downloading: 1.2 MB     Downloading: 1.2 MB     Downloading: 1.3 MB     Downloading: 1.3 MB     Downloading: 1.3 MB     Downloading: 1.3 MB     Downloading: 1.3 MB     Downloading: 1.3 MB     Downloading: 1.3 MB     Downloading: 1.3 MB     Downloading: 1.3 MB     Downloading: 1.3 MB     Downloading: 1.3 MB     Downloading: 1.3 MB     Downloading: 1.3 MB     Downloading: 1.3 MB     Downloading: 1.3 MB     Downloading: 1.3 MB     Downloading: 1.3 MB     Downloading: 1.3 MB     Downloading: 1.3 MB     Downloading: 1.3 MB     Downloading: 1.3 MB     Downloading: 1.3 MB     Downloading: 1.3 MB     Downloading: 1.3 MB     Downloading: 1.3 MB     Downloading: 1.3 MB     Downloading: 1.3 MB     Downloading: 1.3 MB     Downloading: 1.3 MB     Downloading: 1.3 MB     Downloading: 1.3 MB     Downloading: 1.3 MB     Downloading: 1.3 MB     Downloading: 1.3 MB     Downloading: 1.3 MB     Downloading: 1.3 MB     Downloading: 1.4 MB     Downloading: 1.4 MB     Downloading: 1.4 MB     Downloading: 1.4 MB     Downloading: 1.4 MB     Downloading: 1.4 MB     Downloading: 1.4 MB     Downloading: 1.4 MB     Downloading: 1.4 MB     Downloading: 1.4 MB     Downloading: 1.4 MB     Downloading: 1.4 MB     Downloading: 1.4 MB     Downloading: 1.4 MB
diff_county_values = get_acs(geography = "tract", 
                variables = "B07001_049", 
                state = "53",
                county = "033",
                geometry = TRUE)
## Getting data from the 2019-2023 5-year ACS
## Downloading feature geometry from the Census website.  To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
diff_state_values = get_acs(geography = "tract", 
                variables = "B07001_065", 
                state = "53",
                county = "033",
                geometry = TRUE)
## Getting data from the 2019-2023 5-year ACS
## Downloading feature geometry from the Census website.  To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
diff_country_values = get_acs(geography = "tract", 
                variables = "B07001_081", 
                state = "53",
                county = "033",
                geometry = TRUE)
## Getting data from the 2019-2023 5-year ACS
## Downloading feature geometry from the Census website.  To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
same_county_values %>% 
  ggplot() + 
  geom_sf(aes(fill = estimate), color = "white") +
  scale_fill_viridis_c(option = "plasma", name = "Estimates", limits = c(0, 1600)) +  
  theme_void() +
  ggtitle("Migration Within the County (King County, 2023)") +
  theme(plot.title = element_text(size = 14, face = "bold", hjust = 0.5))

diff_county_values %>% 
  ggplot() + 
  geom_sf(aes(fill = estimate), color = "white") +
  scale_fill_viridis_c(option = "plasma", name = "Estimates", limits = c(0, 1600)) +  
  theme_void() +
  ggtitle("Migration From a Different County Within WA (King County, 2023)") +
  theme(plot.title = element_text(size = 14, face = "bold", hjust = 0.5))

diff_state_values %>% 
  ggplot() + 
  geom_sf(aes(fill = estimate), color = "white") +
  scale_fill_viridis_c(option = "plasma", name = "Estimates", limits = c(0, 1600)) +  
  theme_void() +
  ggtitle("Migration From a Different State (King County, 2023)") +
  theme(plot.title = element_text(size = 14, face = "bold", hjust = 0.5))

diff_country_values %>% 
  ggplot() + 
  geom_sf(aes(fill = estimate), color = "white") +
  scale_fill_viridis_c(option = "plasma", name = "Estimates", limits = c(0, 1600)) +  
  theme_void() +
  ggtitle("Migration From a Different Country (King County, 2023)") +
  theme(plot.title = element_text(size = 14, face = "bold", hjust = 0.5))

Make sure each map has the same coloring for its legend as the others.

##(d) 5 points

Construct the 90% confidence intervals for each of the variables and tracts in part (c), estimate \(±\) moe. Take the average across tracts for each variable of the width of the confidence intervals. Compare that to the length of the widths of the confidence intervals in part (a). How do confidence interval widths compare between County-level, 1-year estimates and tract-level, 5-year estimates? Which variable has the largest width?

The County-level, 1-year estimate confidence interval widths are:

 Stayed in the same county: 42598.92  Moved from different county within same state: 18206.86  Moved from different state: 24625.65
 Moved from abroad: 16476.32

The Tract-level, 5 year estimate CI widths are:  Stayed in the same county: 812.5901  Moved from different county within same state: 234.9725  Moved from different state: 385.9801  Moved from abroad: 210.2277

The variable with the largest CI width in both county-level and tract-level data is “Stayed in the same county.” The second-highest in both is “Moved from a different state”.

Comparing CI widths from county-level vs. tract-level, the county-level 90% CI widths are all much larger, on an order of ten times larger, than the tract-level 90% CI widths. This could be because the tract level data is much more granular, and thus any extrema in margins of error are smoothed out more easily when we take the mean.

same_county_values_CI = get_acs(geography = "tract", 
                variables = "B07001_033", 
                state = "53",
                county = "033")
## Getting data from the 2019-2023 5-year ACS
diff_county_values_CI = get_acs(geography = "tract", 
                variables = "B07001_049", 
                state = "53",
                county = "033")
## Getting data from the 2019-2023 5-year ACS
diff_state_values_CI = get_acs(geography = "tract", 
                variables = "B07001_065", 
                state = "53",
                county = "033")
## Getting data from the 2019-2023 5-year ACS
diff_country_values_CI = get_acs(geography = "tract", 
                variables = "B07001_081", 
                state = "53",
                county = "033")
## Getting data from the 2019-2023 5-year ACS
same_county_values_CI = same_county_values_CI %>% 
  mutate(
    lower_CI = estimate - 1.645 * moe,
    upper_CI = estimate + 1.645 * moe,
    lower_CI = as.numeric(lower_CI),
    upper_CI = as.numeric(upper_CI),
    width_CI = upper_CI - lower_CI)

diff_county_values_CI = diff_county_values_CI %>% 
  mutate(
    lower_CI = estimate - 1.645 * moe,
    upper_CI = estimate + 1.645 * moe,
    lower_CI = as.numeric(lower_CI),
    upper_CI = as.numeric(upper_CI),
    width_CI = upper_CI - lower_CI)

diff_state_values_CI = diff_state_values_CI %>% 
  mutate(
    lower_CI = estimate - 1.645 * moe,
    upper_CI = estimate + 1.645 * moe,
    lower_CI = as.numeric(lower_CI),
    upper_CI = as.numeric(upper_CI),
    width_CI = upper_CI - lower_CI)

diff_country_values_CI = diff_country_values_CI %>% 
  mutate(
    lower_CI = estimate - 1.645 * moe,
    upper_CI = estimate + 1.645 * moe,
    lower_CI = as.numeric(lower_CI),
    upper_CI = as.numeric(upper_CI),
    width_CI = upper_CI - lower_CI) 

average_ci_width_033 <- mean(same_county_values_CI$width_CI, na.rm = TRUE)
average_ci_width_049 <- mean(diff_county_values_CI$width_CI, na.rm = TRUE)
average_ci_width_065 <- mean(diff_state_values_CI$width_CI, na.rm = TRUE)
average_ci_width_081 <- mean(diff_country_values_CI$width_CI, na.rm = TRUE)

##(e) 5 points

Are the measures you’ve summarized in this problem migrant stocks or migrant flows? Explain your answer.

I believe the estimates of individuals moving to King County in 2023 (and their places of origin) represent migrant stocks, not migrant flows. Migrant stocks are datasets measuring the total number of people in a specific location (King County) at a specific time (the year 2023).

Since the ACS data records individuals who have been in King County for at least a year, and records where they were 1 year prior, it does not record changes in location over a time period, just where they were one year prior to the day they ask “Where did you live 1 year ago?”.

Even if the ACS data were in 5-year aggregate estimates as in later parts of the problem, the data still answers the same question as before. It doesn’t capture what migrant flows capture, which is whether the same individuals are moving multiple times in a given time period.

Problem 2: Construct a Demographic Profile of a Population of Your Choice - 60 points

##(a) 10 points

Select a nation whose demographic measures you’d like to explore. Of the data sources covered in this class (e.g. HMD, HCoD, HFD, WPP 2024, DHS, ACS, etc.), where can you find information for this country? For data sources beyond the WPP 2024, what added benefits do other data sources provide for this country?

- Country: Sweden; I chose Sweden because it is well-represented in the HMD and HFD; Scotland, for example, has data integrity difficulties marked in a warning on the HMD page. - Location Code: 752

- The HMD has 1x1 life tables from 1751 - 2023. - The HFD has age-specific fertility rates from 1891 - 2023. - The HMD has 1x1 population size for females, males, and total, from 1751 - 2024. - The UN Department of Economic and Social Affairs has migrant stocks for every five years since 1990, and 2024. The added benefit is that the HMD and HFD doesn’t seem to have this data. - The UN Department of Economic and Social Affairs also has migrant flows for the years 1980 - 2013

Just out of curiosity, not to be used in this homework, I found inflow migration data for the years 2012 - 2022, the OECD reports in thousands for Sweden:

2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 82.6, 95.4, 106.1, 113.9, 143.0, 125.0, 114.4, 98.2, 65.8, 74.4, 86.5

Since the final year with all data is 2023, that is the year I will choose to project out to 2100.

fertility_lines = readLines("~/Documents/CSDE 533 Homeworks/sweden_fertilityrates.rtf")

#skip unnecessary lines
fertility_data_lines = fertility_lines[13:length(fertility_lines)]

# get rid of '\\' and white space
fertility_cleaned_lines = gsub("\\\\", "", fertility_data_lines)

hfd_sweden = data.frame(Year = integer(), Age = character(), ASFR = numeric(), stringsAsFactors = FALSE)

for (line in fertility_cleaned_lines) {
  parts <- unlist(strsplit(line, "\\s+"))  # Split by whitespace
  
  # Append to dataframe
  hfd_sweden = rbind(hfd_sweden, data.frame(
    Year = as.integer(parts[1]), 
    Age = parts[2], 
    ASFR = as.numeric(parts[3]),
    stringsAsFactors = FALSE
  ))
}

hfd_sweden_raw <- hfd_sweden %>%
  filter(Year >= 1950 & Year <= 2023)

hfd_sweden_raw[is.na(hfd_sweden_raw)] = 0
female_lifetable_lines = readLines("~/Documents/CSDE 533 Homeworks/sweden_female_lifetable.rtf")

#skip unnecessary lines
female_lifetable_data_lines = female_lifetable_lines[13:length(female_lifetable_lines)]

# get rid of '\\' and white space
female_lifetable_cleaned_lines = gsub("\\\\", "", female_lifetable_data_lines)

female_lifetable_split_lines = female_lifetable_cleaned_lines %>% 
  strsplit("\\s+") %>% 
  lapply(function(x) x[-1])

female_lifetable_sweden = data.frame(Year = integer(), Age = character(), mx = numeric(), qx = numeric(), ax = numeric(), lx = numeric(), dx = numeric(), Lx = numeric(), Tx = numeric(), ex = numeric())

for (line in female_lifetable_split_lines) {
  female_lifetable_sweden = rbind(female_lifetable_sweden, data.frame(
                                       Year = as.integer(line[1]), 
                                       Age = line[2], 
                                       mx = as.numeric(line[3]),
                                       qx = as.numeric(line[4]),
                                       ax = as.numeric(line[5]),
                                       lx = as.numeric(line[6]),
                                       dx = as.numeric(line[7]),
                                       Lx = as.numeric(line[8]),
                                       Tx = as.numeric(line[9]),
                                       ex = as.numeric(line[10])))
}

female_lifetable_sweden_raw <- female_lifetable_sweden %>%
  filter(Year >= 1950 & Year <= 2023) %>% 
  rename(Female_mx = mx,Female_qx = qx, Female_ax = ax, Female_lx = lx, Female_dx = dx, Female_Lx = Lx, Female_Tx = Tx, Female_ex = ex)

female_lifetable_sweden_raw[is.na(female_lifetable_sweden_raw)] <- 0
lifetable_lines = readLines("~/Documents/CSDE 533 Homeworks/sweden_total_lifetable.rtf")

#skip unnecessary lines
lifetable_data_lines = lifetable_lines[13:length(lifetable_lines)]

# get rid of '\\' and white space
lifetable_cleaned_lines = gsub("\\\\", "", lifetable_data_lines)

lifetable_split_lines = lifetable_cleaned_lines %>% 
  strsplit("\\s+") %>% 
  lapply(function(x) x[-1])

lifetable_sweden = data.frame(Year = integer(), Age = character(), mx = numeric(), qx = numeric(), ax = numeric(), lx = numeric(), dx = numeric(), Lx = numeric(), Tx = numeric(), ex = numeric())

for (line in lifetable_split_lines) {
  lifetable_sweden = rbind(lifetable_sweden, data.frame(
                                       Year = as.integer(line[1]), 
                                       Age = line[2], 
                                       mx = as.numeric(line[3]),
                                       qx = as.numeric(line[4]),
                                       ax = as.numeric(line[5]),
                                       lx = as.numeric(line[6]),
                                       dx = as.numeric(line[7]),
                                       Lx = as.numeric(line[8]),
                                       Tx = as.numeric(line[9]),
                                       ex = as.numeric(line[10])))
}

lifetable_sweden_raw <- lifetable_sweden %>%
  filter(Year >= 1950 & Year <= 2023)

lifetable_sweden_raw[is.na(lifetable_sweden_raw)] <- 0
pop_lines = readLines("~/Documents/CSDE 533 Homeworks/sweden_population.rtf")

#skip unnecessary lines
pop_data_lines = pop_lines[13:length(pop_lines)]

# get rid of '\\' and white space
pop_cleaned_lines = gsub("\\\\", "", pop_data_lines)

pop_split_lines = pop_cleaned_lines %>% 
  strsplit("\\s+") %>% 
  lapply(function(x) x[-1])

pop_sweden = data.frame(Year = integer(), Age = character(), Female = numeric(), Male = numeric(), Total = numeric())

for (line in pop_split_lines) {
  pop_sweden = rbind(pop_sweden, data.frame(Year = as.integer(line[1]), 
                                       Age = line[2], 
                                       Female = as.numeric(line[3]),
                                       Male = as.numeric(line[4]),
                                       Total = as.numeric(line[5])))
}

pop_sweden_raw <- pop_sweden %>%
  filter(Year >= 1950 & Year <= 2023) %>% 
  rename(Female_Pop = Female, Male_Pop = Male, Total_Pop = Total)

pop_sweden_raw[is.na(pop_sweden_raw)] <- 0

##(b) 20 points

Plot the estimates and projections of population size to 2100, plot life expectancy, plot total fertility rate, and plot the net migration for your country.

Data Cleaning

# clean HFD data for 2023
hfd_sweden_2023 = hfd_sweden_raw %>% 
  filter(Year == 2023)

hfd_sweden_2023$Age[1] = 12
hfd_sweden_2023$Age[nrow(hfd_sweden_2023)] = 55

hfd_sweden_2023 = hfd_sweden_2023 %>% 
  mutate(Age = as.numeric(Age)) %>% 
  mutate(x_half = Age + 0.5) %>% 
  select(-Year)

# clean lifetable data for 2023
female_lifetable_sweden_2023 = female_lifetable_sweden_raw %>%
  filter(Year == 2023) %>% 
  mutate(Age = as.numeric(Age)) %>% 
  select(-Year)


# join data for fertility calculations and clean
fertility_metrics_2023 = female_lifetable_sweden_2023 %>% 
  left_join(hfd_sweden_2023, by = "Age") 

fertility_metrics_2023[is.na(fertility_metrics_2023)] = 0

Population Estimates and Projections to 2100

More Data Cleaning

# Merge population and fertility data based on Year and Age
sweden_data <- left_join(pop_sweden_raw, hfd_sweden_raw,  by = c("Year", "Age"))
sweden_data[is.na(sweden_data)] = 0

# Merge life table data for Lx values
sweden_data <- left_join(sweden_data, female_lifetable_sweden_raw, by = c("Year", "Age"))
sweden_data[is.na(sweden_data)] = 0

sweden_data_2023 = sweden_data %>%
  filter(Year == 2023)

sweden_data_2023$survivorship = NA

          # first row has no preceding age group
for (i in 2:nrow(sweden_data_2023)) {
    #(current age group's Lx) / (previous age group's Lx)
    sweden_data_2023$survivorship[i] = sweden_data_2023$Female_Lx[i] / sweden_data_2023$Female_Lx[i - 1]
  }

sweden_data_2023$survivorship[1] = sweden_data_2023$Female_Lx[1] / sweden_data_2023$Female_lx[1]

# hard-code before we combine age group 109 and 110+ 
# since no one survives open interval, must put T_{x+n}/T_{x} instead
sweden_data_2023$survivorship[nrow(sweden_data_2023)] = 0

show_survivorship_sweden_data_2000 = sweden_data_2023 %>% 
  select(Age, survivorship) 

kable(show_survivorship_sweden_data_2000,
      col.names = c("Age", "Survivorship"),
      caption = "Sanity Checking Survivorship") %>%
  kable_styling(font_size = 15,
                bootstrap_options = c("striped", "condensed")) %>%
  scroll_box(width = "300px", height = "600px")
Sanity Checking Survivorship
Age Survivorship
0 0.9985300
1 0.9996795
2 0.9998998
3 0.9998798
4 0.9998497
5 0.9999098
6 0.9999699
7 0.9999599
8 0.9999198
9 0.9999298
10 0.9999599
11 0.9999298
12 0.9999198
13 0.9999198
14 0.9999098
15 0.9998797
16 0.9998897
17 0.9998495
18 0.9997994
19 0.9997893
20 0.9997391
21 0.9997189
22 0.9996887
23 0.9996886
24 0.9997488
25 0.9997487
26 0.9997889
27 0.9997989
28 0.9997586
29 0.9997586
30 0.9997384
31 0.9996981
32 0.9996174
33 0.9996173
34 0.9996272
35 0.9995162
36 0.9995664
37 0.9996267
38 0.9995862
39 0.9994952
40 0.9994747
41 0.9994340
42 0.9993528
43 0.9993119
44 0.9993722
45 0.9993819
46 0.9992903
47 0.9991579
48 0.9990049
49 0.9988107
50 0.9986465
51 0.9984816
52 0.9983568
53 0.9982212
54 0.9980849
55 0.9981223
56 0.9979132
57 0.9974143
58 0.9969842
59 0.9966021
60 0.9965178
61 0.9962344
62 0.9960631
63 0.9955745
64 0.9946468
65 0.9939705
66 0.9935067
67 0.9925398
68 0.9914765
69 0.9909226
70 0.9902772
71 0.9895027
72 0.9878839
73 0.9865624
74 0.9849134
75 0.9824672
76 0.9807110
77 0.9779597
78 0.9751869
79 0.9723491
80 0.9692148
81 0.9659228
82 0.9604192
83 0.9540614
84 0.9475527
85 0.9403250
86 0.9321961
87 0.9217784
88 0.9111661
89 0.8999189
90 0.8841610
91 0.8666979
92 0.8468311
93 0.8282851
94 0.8076853
95 0.7841113
96 0.7641671
97 0.7399813
98 0.7150449
99 0.6896125
100 0.6638440
101 0.6381910
102 0.6123561
103 0.5885262
104 0.5630252
105 0.5432836
106 0.5164835
107 0.5000000
108 0.4893617
109 0.4782609
110+ 0.0000000
sweden_data_2023$Female_ASFR = NA

for (i in 1:nrow(sweden_data_2023)) {
      sweden_data_2023$Female_ASFR[i] = sweden_data_2023$ASFR[i]*(1/(1+1.05))
  }

# remove unnecessary columns for brevity
sweden_data_2023F = sweden_data_2023 %>% 
  select(-Year, -Male_Pop, -Total_Pop, -ASFR, -Female_ax, Female_dx, -Female_ex)

sweden_data_2023F$survivorship[nrow(sweden_data_2023F)] = 0  

# last two rows (109 and 110+ age group)
last_two_rows = tail(sweden_data_2023F, 2)

# Sum the values for numerical columns
summed_row = last_two_rows %>%
  summarise(
    Age = "109",
    Female_Pop = sum(Female_Pop, na.rm = TRUE),
    Female_mx = sum(Female_Pop, na.rm = TRUE),
    Female_qx = sum(Female_qx, na.rm = TRUE),
    Female_lx = sum(Female_lx, na.rm = TRUE),
    Female_dx = sum(Female_lx, na.rm = TRUE),
    Female_Lx = sum(Female_Lx, na.rm = TRUE),
    Female_Tx = sum(Female_Tx, na.rm = TRUE),
    survivorship = sum(survivorship, na.rm = TRUE),
    Female_ASFR = sum(Female_ASFR, na.rm = TRUE)

  )

sweden_data_2023F = sweden_data_2023F %>%
  slice(1:(n() - 2)) %>%
  bind_rows(summed_row)

Creating Leslie matrix

# leslie matrix dimensions: same number of rows and columns as there are age groups
num_age_groups = length(unique(sweden_data_2023F$Age))

sweden_leslie_matrix = matrix(NA, nrow = num_age_groups, ncol = num_age_groups)

survivorship_values = sweden_data_2023F$survivorship[1:num_age_groups]

# Place the survivorship values on the subdiagonal (starting from row 2, column 1)
for (i in 1:length(survivorship_values) - 1) {
  sweden_leslie_matrix[i + 1, i] <- survivorship_values[i]
}

sweden_leslie_matrix[is.na(sweden_leslie_matrix)] = 0

top_5x5 <- sweden_leslie_matrix[1:5, 1:5]
print("First few rows and columns of the Leslie Matrix")
## [1] "First few rows and columns of the Leslie Matrix"
print(top_5x5)
##         [,1]      [,2]      [,3]      [,4] [,5]
## [1,] 0.00000 0.0000000 0.0000000 0.0000000    0
## [2,] 0.99853 0.0000000 0.0000000 0.0000000    0
## [3,] 0.00000 0.9996795 0.0000000 0.0000000    0
## [4,] 0.00000 0.0000000 0.9998998 0.0000000    0
## [5,] 0.00000 0.0000000 0.0000000 0.9998798    0
penult_index = nrow(sweden_data_2023F) -1
final_index = nrow(sweden_data_2023F)

T_x_penult <- sweden_data_2023F$Female_Tx[penult_index]  # T_x
T_x_final <- sweden_data_2023F$Female_Tx[final_index]  # T_{x+n}

open_interval_scalar = T_x_final/T_x_penult

# Insert the computed value into the last row/last column of the Leslie matrix
sweden_leslie_matrix[final_index, ncol(sweden_leslie_matrix)] <- open_interval_scalar

tail_5x5 <- sweden_leslie_matrix[(nrow(sweden_leslie_matrix)-4):nrow(sweden_leslie_matrix), 
                                  (ncol(sweden_leslie_matrix)-4):ncol(sweden_leslie_matrix)]
print("Final few rows and columns of the Leslie Matrix")
## [1] "Final few rows and columns of the Leslie Matrix"
print(tail_5x5)
##           [,1]      [,2] [,3]      [,4]      [,5]
## [1,] 0.0000000 0.0000000  0.0 0.0000000 0.0000000
## [2,] 0.5432836 0.0000000  0.0 0.0000000 0.0000000
## [3,] 0.0000000 0.5164835  0.0 0.0000000 0.0000000
## [4,] 0.0000000 0.0000000  0.5 0.0000000 0.0000000
## [5,] 0.0000000 0.0000000  0.0 0.4893617 0.6428571
n = 1

sweden_data_2023F <- sweden_data_2023F %>%
   mutate(
    nLx_prev = lag(Female_Lx, default = NA),   # nL_{(x - n)}
    nFx_prev = lag(Female_Pop, default = NA), # nN_{(x - n)}^F
    term1 = Female_ASFR * n * (1/ (2)),
    term2 = Female_ASFR * n * ((1 * (Female_Lx / nLx_prev)) / (2)),
    # gets rid of NAs
    nBx = ifelse(is.na(nLx_prev) | is.na(nFx_prev), 0, (term1 + term2)*sweden_data_2023F$survivorship[1])  
  )

sweden_data_2023F[is.na(sweden_data_2023F)] = 0

fertility_row <- sweden_data_2023F$nBx
sweden_leslie_matrix[1, ] <- fertility_row
project_population = function(matrix, input_data, years_list) {
  # Convert the input population data to a matrix to avoid errors
  n_vector <- as.matrix(input_data)
  num_age_groups <- nrow(matrix)
  
  # Check if the population vector length matches the number of rows in the Leslie matrix
  if (length(n_vector) != num_age_groups) {
    stop("Population vector length must match the number of rows in the Leslie matrix.")
  }
  
  # Initialize the dataframe to hold the projections
  sweden_projection <- data.frame(Age = sweden_data_2023F$Age)
  
  # Loop through each year in the years_list and calculate the projections
  for (years in years_list) {
    temp_vector <- n_vector
    for (i in 1:years) {
      temp_vector <- matrix %*% temp_vector
    }
    sweden_projection[[paste0("Projection_", years, "_years")]] <- as.numeric(temp_vector)
  }
  
  # Return the dataframe with the projections
  return(sweden_projection)
}

# Define the list of years for projections (1 to 77 years)
years_to_project = 1:77 

# Call the function with the Leslie matrix, the initial population (Female_Pop in this case), and the years to project
sweden_population_projections = project_population(sweden_leslie_matrix, sweden_data_2023F$Female_Pop, years_to_project)

# Calculate the total population for each projected year (sum of the entire Projection_X_years column)
# We will sum each of the Projection_X_years columns (Projection_1_years, Projection_2_years, ..., Projection_77_years)
total_population_by_year = sapply(1:77, function(i) sum(sweden_population_projections[[paste0("Projection_", i, "_years")]], na.rm = TRUE))

# Create a dataframe for plotting the total population projections
total_population_df = data.frame(Year = 2023 + years_to_project, Total_Population = total_population_by_year)

# View the resulting dataframe
total_population_df = total_population_df %>% 
  mutate(Type = "Projected Population")
sweden_annual_populations <- data.frame(Year = integer(), Total_Population = numeric())

for (year in 1950:2023) {
  
  pop_sweden_year <- pop_sweden_raw %>%
    filter(Year == year)
  
  pop_sweden_year$Age[nrow(pop_sweden_year)] <- 110
  
  pop_sweden_year <- pop_sweden_year %>%
    mutate(Age = as.numeric(Age)) %>% 
    mutate(type = "Observed Population")
  
  total_population <- sum(pop_sweden_year$Female_Pop, na.rm = TRUE)
  
  sweden_annual_populations <- bind_rows(sweden_annual_populations, 
                                         data.frame(Year = year, Total_Population = total_population, Type = "Observed Population"))
}

sweden_annual_populations = rbind(sweden_annual_populations, total_population_df)
ggplot(sweden_annual_populations, aes(x = Year, y = Total_Population, color = Type)) +
  geom_line(linewidth = 1) + 
  geom_point(size = 0.85) +
  scale_x_continuous(
    breaks = seq(1950, 2100, by = 10)
  ) +
  scale_y_continuous(
    limits = c(2500000, 6000000),
    breaks = seq(0, 6000000, by = 500000),
    labels = scales::comma
  ) +
  labs(title = "Observed and Projected Total Population of Sweden (1950-2100)",
       x = "Year",
       y = "Female Population") +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 10),
    legend.title = element_blank(),
    panel.grid.major = element_blank()
  ) +
  scale_color_manual(values = c("Observed Population" = "slateblue", "Projected Population" = "steelblue"))

The projected population is expected to shrink back to 20th-century levels by 2100. This could be becuase of the high proportion of older age groups in Sweden as of 2023. This is not the trajectory of a stable population, and it is also unlikely that beyond 2100 the population of Sweden should dip below that of 1950 Sweden.

Total Life Expectancy

for all age groups in 2023, Plotted

lifetable_sweden_2023 = lifetable_sweden_raw %>%
  filter(Year == 2023) %>% 
  select(-Year)

lifetable_sweden_2023$Age[nrow(lifetable_sweden_2023)] = 110

lifetable_sweden_2023 = lifetable_sweden_2023 %>%
  mutate(Age = as.numeric(Age))

ggplot(lifetable_sweden_2023, aes(x = Age, y = ex)) +
  geom_line(color = "orchid4", linewidth = 1) +
  geom_point(color = "orchid4", size = 0.85) +
  scale_x_continuous(
    breaks = seq(0, 110, by = 5)
  ) +
  labs(title = "Sweden's Period Further Life Expected for Total Population in 2023",
       x = "Year",
       y = "Life Expectancy (ex)") +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 10),
    legend.title = element_blank(),
    panel.grid.major = element_blank()
  )

This chart just shows, for 2023, the expected number of years left of life for each age group.

for babies born in 1950 to 2023, plotted

births_lines = readLines("~/Documents/CSDE 533 Homeworks/sweden_births.rtf")
births_data_lines = births_lines[13:length(births_lines)]

births_cleaned_lines = gsub("\\\\", "", births_data_lines)

births_split_lines = strsplit(births_cleaned_lines, "\\s+")

births_sweden = data.frame(Year = integer(), Births = numeric())

for (line in births_split_lines) {
  if (length(line) >= 2) {

    year = trimws(line[1])
    births = trimws(line[2])
    
    births_sweden = rbind(births_sweden, data.frame(
      Year = as.integer(year),
      Births = as.numeric(births)
    ))
  }
}

births_sweden_raw <- births_sweden %>%
  filter(Year >= 1950 & Year <= 2023) %>% 
  mutate(Female_births = Births * .4886)

births_sweden_raw[is.na(births_sweden_raw)] <- 0
cohort_life_expectancies = data.frame(Year = integer(), e_x = numeric())

for (year in 1950:2023) {
  
  # get the T_0 value for this year
  Tx_0 = lifetable_sweden_raw %>%
    filter(Year == year & Age == 0) %>%
    pull(Tx)
  
  l0 = 100000

  # Calculate cohort life expectancy by dividing T_0 by l_0
  cohort_ex_val = Tx_0 / l0
  
  cohort_ex_val = round(cohort_ex_val, 2)
  cohort_life_expectancies = rbind(cohort_life_expectancies, 
                                 data.frame(Year = year, e_x = cohort_ex_val))
}

ggplot(cohort_life_expectancies, aes(x = Year, y = e_x)) +
  geom_line(color = "orchid4", linewidth = 1) +
  geom_point(color = "orchid4", size = 0.85) +
  scale_x_continuous(
    breaks = seq(1950, 2023, by = 10)
  ) +
  scale_y_continuous(
    breaks = seq(70, 90, by = 2.5)
  ) +
  labs(title = "Sweden's Cohort Life Expectancies for Total Population 1950-2023",
       x = "Year",
       y = "Life Expectancy (e_x)") +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 10),
    legend.title = element_blank(),
    panel.grid.major = element_blank())

This next plot shows the expected years of life for each cohort of people born in each year along the x-axis. Sweden’s cohort life expectancy has been steadily rising since 1950, with a small dip in 2020 likely due to the COVID-19 pandemic. This dip is much lower than say, Chile’s dip in life expectancy, which I had plotted in an earlier homework. Differences in public policy, public health practices, the proportion of residents in urban (dense) regions, or the healthcare industry could be attributed to the difference in life expectancy dip due to COVID, between Sweden and Chile.

Annual Total Fertility Rate, Plotted

# get TFR for each year
tfr_by_year <- hfd_sweden_raw %>%
  group_by(Year) %>%  
  summarise(TFR = sum(ASFR, na.rm = TRUE))

ggplot(tfr_by_year, aes(x = Year, y = TFR)) +
  geom_line(color = "orchid4", linewidth = 1) +
  geom_point(color = "orchid4", size = 0.85) +
  scale_x_continuous(
    breaks = seq(1950, 2023, by = 10)
  ) +
  labs(title = "Sweden's Total Fertility Rate (TFR) 1950 - 2023",
       x = "Year",
       y = "Total Fertility Rate") +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 10),
    legend.title = element_blank(),
    panel.grid.major = element_blank()
  )

Total fertility has seen jumps towards 2.0 in both 1990 and 2010, but the sharp dips between these spikes demonstrate the trend in decreasing annual TFR’s.

Net Migration Rate, Plotted

The Net Migration Rate for area i in time period t is defined as: \[CMP_{i,t} = \frac{I_{i,t} + O_{i,t}}{PY_{i,t}} \] Where \(I_{i,t}\) is the number of in-migrants for an area i in time period t (in this case, one year), \(O_{i,t}\) is the number of out-migrants for an area i in time period t, and \(PY_{i,t}\) is the number of person-years lived in that year t.

Information about the UN’s data source are as follows from the data dictionary + documentation:

Notes on Sweden Migrant Flow Data: “International migration data are derived from the population register. The statistics on immigration include citizens and foreigners arriving from abroad intending to stay in the country for a year or longer. Foreigners must hold a valid residence permit. The statistics on emigration include all foreigners and citizens leaving the country with the intention of staying abroad for one year or longer. The time criterion applied for emigration to another Nordic country depends on rules established by the receiving country.”

global_migration_flow <- read_excel(
  path = "~/Documents/CSDE 533 Homeworks/undesa_pd_2015_migration_flow_totals.xlsx", 
  skip = 16, 
  col_names = FALSE
)
## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...17`
## • `` -> `...18`
## • `` -> `...19`
## • `` -> `...20`
## • `` -> `...21`
## • `` -> `...22`
## • `` -> `...23`
## • `` -> `...24`
## • `` -> `...25`
## • `` -> `...26`
## • `` -> `...27`
## • `` -> `...28`
## • `` -> `...29`
## • `` -> `...30`
## • `` -> `...31`
## • `` -> `...32`
## • `` -> `...33`
## • `` -> `...34`
## • `` -> `...35`
## • `` -> `...36`
## • `` -> `...37`
## • `` -> `...38`
# make the column names the actual column names
colnames(global_migration_flow) <- as.character(global_migration_flow[1, ])
global_migration_flow <- global_migration_flow[-1, ]

# clean sweden in-migrant and out-migrant data
sweden_flow = global_migration_flow %>% 
  filter(CntName == "Sweden" & Criteria == "Residence") %>% 
  select(-CntName, -Criteria, -Type, -Coverage)

# combine with person-years data from the total population life table
lifetable_sweden_mig = lifetable_sweden_raw %>% 
  filter(Year >= 1980 & Year <= 2013) %>% 
  select(Year, Lx)

Lx_by_year <- lifetable_sweden_mig %>%
  group_by(Year) %>%  
  summarise(Annual_Lx = sum(Lx, na.rm = TRUE))

wide_annual_Lx = pivot_wider(Lx_by_year,
            names_from = Year,
            values_from = Annual_Lx)

sweden_flow = rbind(sweden_flow, wide_annual_Lx)

# pivot_longer() did not work at all
sweden_long_flow <- sweden_flow %>%
  slice(1:3) %>%
  t() %>%
  as.data.frame() %>%
  setNames(c("In_migrants", "Out_migrants", "Total_PY")) %>%
  mutate(Year = rownames(.)) %>%
  select(Year, In_migrants, Out_migrants, Total_PY) %>%
  mutate(Year = as.numeric(Year)) %>% 
  mutate(In_migrants = as.numeric(In_migrants)) %>% 
  mutate(Out_migrants = as.numeric(Out_migrants)) %>% 
  mutate(Total_PY = as.numeric(Total_PY)) %>% 
  mutate(Net_Mig = ((In_migrants - Out_migrants)/Total_PY) *1000)

# add in total swedish population to plot against, just to see
pop_sweden_mig = pop_sweden_raw %>% 
  filter(Year >= 1980 & Year <= 2013) %>% 
  select(Year, Total_Pop) %>% 
  mutate(Year = as.numeric(Year))

pop_by_year <- pop_sweden_mig %>%
  group_by(Year) %>%  
  summarise(Annual_pop = sum(Total_Pop, na.rm = TRUE)) 
  
sweden_long_flow = left_join(sweden_long_flow, pop_by_year, by = "Year")
ggplot(sweden_long_flow, aes(x = Year)) +
  geom_line(aes(y = Annual_pop, color = "Total Population"), linewidth = 1) +
  geom_line(aes(y = (Net_Mig - min(Net_Mig)) / (max(Net_Mig) - min(Net_Mig)) * (10e6 - 8e6) + 8e6, 
                color = "Net Migration Rate"), 
            linewidth = 1, linetype = "twodash") +
  scale_y_continuous(
    name = "Total Population",
    limits = c(8000000, 10000000),
    breaks = seq(8000000, 10000000, by = 500000),
    labels = scales::comma,
    sec.axis = sec_axis(~ (. - 8e6) / (10e6 - 8e6) * (max(sweden_long_flow$Net_Mig) - min(sweden_long_flow$Net_Mig)) + min(sweden_long_flow$Net_Mig),
                        name = "Net Migration Rate",
                        breaks = seq(-10, 0, by = 1))) +
  scale_x_continuous(
    breaks = seq(min(sweden_long_flow$Year), max(sweden_long_flow$Year), by = 10)) +
  labs(title = "Total Population and Net Migration Rate of Sweden",
       x = "Year") +
  scale_color_manual(values = c("Total Population" = "navy", "Net Migration Rate" = "orchid4")) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.title = element_blank())

Throughout the period for which there is migration flow data, there appears to be a variable, yet consistently negative, net migration rate. This indicates that more people are leaving Sweden each year than there are entering, regardless of whether those people are Swedish citizens or not.

Lower inflows of migrants could be one reason for Sweden’s dropping total fertility rate (and replacement rates) because migrants are typically of childbearing age.

Out of curiosity I checked international student counts for the past two decades (21st century only) and there are routinely about 30,000 international students coming to Sweden every year. The lowest was in the mid-2010’s at around 17,000. What was interesting was that the COVID-19 pandemic did not seem to dampen the amount of inflow of international students; the flow for 2019, 2020, 2021, and 2022 were all relatively similar. Maybe these students are not choosing to settle in Sweden, or perhaps this is just not enough to make a dent in the number of people leaving annually?

Plot uncertainty in projections where available. In words, summarize what happened to the population from 1950-2023 in regards to each parameter.

There were no uncertainty metrics (margins of error, data completeness, etc.) in the data from the UN, HMD, or HFD.

Does the total fertility rate appear to be in decline (Phase II) or has it already reached replacement/below-replacement fertility (Phase III)? If there are any big declines in life expectancy or population size, try to do a little bit of searching on the internet about your country and that time period, and report what may be behind these events.

This answer uses code from solutions to SOC/CSSS/CSDE 533 HW #3 provided by Jess Godwin.

Fertility Metrics in 2023

## TFR = \sum_x nFx * n
f_fab = 1/(1+0.4886)

nrr <- fertility_metrics_2023 %>% 
  mutate(nrr_term = ASFR*Female_Lx*f_fab/Female_lx[1]) %>% 
  pull(nrr_term) %>% sum()

tfr <- fertility_metrics_2023 %>% 
  mutate(tfr_term = 1*ASFR) %>% 
  pull(tfr_term) %>% sum()

## GRR = \sum_x nFx * n* f_fab
grr <- fertility_metrics_2023 %>% 
  mutate(grr_term = 1*ASFR*f_fab) %>% 
  pull(grr_term) %>% sum()

## MAB = mu =  \sum (nFx)(nLx)(x + n/2)/ \sum(nFx)(nLx)
mab_num <- fertility_metrics_2023 %>% 
  mutate(mab_num_term = ASFR*Female_Lx*x_half) %>% 
  pull(mab_num_term) %>% sum()

mab_den <- fertility_metrics_2023 %>% 
  mutate(x_half = Age + 1/2,
         mab_den_term = ASFR*Female_Lx) %>% 
  pull(mab_den_term) %>% sum()
mab <- mab_num/mab_den

data.frame(TFR = tfr,
           GRR = grr,
           NRR = nrr,
           MAB = mab) %>% 
  kbl(., digits = c(2, 2, 2, 1),
      row.names = FALSE,
      caption = "Sweden's TFR, GRR, NRR, and Mean Age at Birth in 2023") %>% 
  kable_styling(position = "center", full_width = FALSE)
Sweden’s TFR, GRR, NRR, and Mean Age at Birth in 2023
TFR GRR NRR MAB
1.45 0.98 0.97 31.6

Life Expectancy has steadily climbed 10 years from 72.5 at 1950 to around 85 years by present day. The Total Fertility Rate is below replacement level, at 1.45. Both GRR and NRR are below 1, which reflects both potential and actual population decline. There is potential population decline because women are having fewer daughters than replacement, and there is also actual population decline, because there are not enough babies total to replace the population. The mean age at childbirth is over 30 years old, consistent with that of Ireland (From my HW#4) and much higher than the global mean age at childbirth.

Finally, summarize what is projected to happen to these demographic measures by 2100.

Sweden is likely to experience continued population aging, with a shrinking working-age population. Due to population momentum, the decline may be more gradual rather than the initial metrics suggest. Even if fertility suddenly increased to replacement level, population decline might continue for a while because there aren’t enough people in younger age groups to immediately reverse the trend.

The long life expectancy will also keep overall population size where it is for some time. Any population shrinking expected by low total fertility rates would not happen for a while.

##(c) 20 points Construct population pyramids with the age-sex distribution of the population in 1950, 2000, 2050, and 2100. Comment on changes from 1950-2000, and projected changes in 2050 and 2100.

pop_sweden_1950 = pop_sweden_raw %>% 
  filter(Year == 1950) %>% 
  mutate(Popplot = -(Female_Pop / sum(Female_Pop))) %>% 
  mutate(Age = ifelse(row_number() == n(), 110, Age)) %>%
  select(Year, Age, Popplot) %>% 
  mutate(Type = "1950 Population Composition")

pop_sweden_2000 = pop_sweden_raw %>% 
  filter(Year == 2000) %>% 
  mutate(Popplot = Female_Pop / sum(Female_Pop)) %>% 
  mutate(Age = ifelse(row_number() == n(), 110, Age)) %>%
  select(Year, Age, Popplot) %>% 
  mutate(Type = "2000 Population Composition")

pop_sweden_negative = pop_sweden_2000 %>% 
  mutate(Popplot = -Popplot)

midway_projection = sweden_population_projections[28]
final_projection = sweden_population_projections[78]

pop_pyramid_midway = midway_projection %>% 
  mutate(Age = sweden_data_2023F$Age) %>% 
  mutate(Popplot = Projection_27_years / sum(Projection_27_years)) %>% 
  mutate(Year = 2050) %>% 
  mutate(Age = ifelse(row_number() == n(), 110, Age)) %>%
  select(Year, Age, Popplot) %>% 
  mutate(Type = "2050 Projected Pop. Composition")

pop_pyramid_midway_negative = pop_pyramid_midway %>% 
  mutate(Popplot = -Popplot)

pop_pyramid_final = final_projection %>% 
  mutate(Age = sweden_data_2023F$Age) %>% 
  mutate(Popplot = Projection_77_years / sum(Projection_77_years)) %>% 
  mutate(Year = 2100) %>% 
  mutate(Age = ifelse(row_number() == n(), 110, Age)) %>%
  select(Year, Age, Popplot) %>% 
  mutate(Type = "2100 Projected Pop. Composition")

sweden_pyrplot1 = rbind(pop_sweden_1950, pop_sweden_2000)
sweden_pyrplot2 = rbind(pop_sweden_negative, pop_pyramid_midway)
sweden_pyrplot3 = rbind(pop_sweden_negative, pop_pyramid_final)
sweden_pyrplot4 = rbind(pop_pyramid_midway_negative, pop_pyramid_final)
ggplot(sweden_pyrplot1, aes(x = as.numeric(Age), y = Popplot, fill = as.factor(Year))) +
  geom_bar(stat = "identity") +
  scale_x_continuous(
    breaks = seq(0, max(sweden_pyrplot1$Age), by = 10)
  ) +
  coord_flip() +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 10),
    legend.title = element_blank(),
    panel.grid.major = element_blank()
  ) +
  labs(
    x = "Age Group", y = "Female Population Porportion",  # x is Age, y is Female Population proportion
    fill = "Year", title = paste("Sweden Female Population Pyramid (", 1950, " vs. ", 2000, ")", sep = "")
  ) +
  scale_fill_manual(values = c("1950" = "slateblue", "2000" = "steelblue"))

ggplot(sweden_pyrplot2, aes(x = as.numeric(Age), y = Popplot, fill = as.factor(Year))) +
  geom_bar(stat = "identity") +
  scale_x_continuous(
    breaks = seq(0, max(sweden_pyrplot2$Age), by = 10)
  ) +
  coord_flip() +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 10),
    legend.title = element_blank(),
    panel.grid.major = element_blank()
  ) +
  labs(
    x = "Age Group", y = "Female Population Porportion",
    fill = "Year", title = paste("Sweden Female Population Pyramid (", 2000, " vs. ", 2050, ")", sep = "")
  ) +
  scale_fill_manual(values = c("2000" = "slateblue", "2050" = "steelblue"))

ggplot(sweden_pyrplot3, aes(x = as.numeric(Age), y = Popplot, fill = as.factor(Year))) +
  geom_bar(stat = "identity") +
  scale_x_continuous(
    breaks = seq(0, max(sweden_pyrplot3$Age), by = 10)
  ) +
  coord_flip() +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 10),
    legend.title = element_blank(),
    panel.grid.major = element_blank()
  ) +
  labs(
    x = "Age Group", y = "Female Population Porportion",
    fill = "Year", title = paste("Sweden Female Population Pyramid (", 2000, " vs. ", 2100, ")", sep = "")
  ) +
  scale_fill_manual(values = c("2000" = "slateblue", "2100" = "steelblue"))

ggplot(sweden_pyrplot4, aes(x = as.numeric(Age), y = Popplot, fill = as.factor(Year))) +
  geom_bar(stat = "identity") +
  scale_x_continuous(
    breaks = seq(0, max(sweden_pyrplot4$Age), by = 10)
  ) +
  coord_flip() +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 10),
    legend.title = element_blank(),
    panel.grid.major = element_blank()
  ) +
  labs(
    x = "Age Group", y = "Female Population Porportion",
    fill = "Year", title = paste("Sweden Female Population Pyramid (", 2050, " vs. ", 2100, ")", sep = "")
  ) +
  scale_fill_manual(values = c("2050" = "slateblue", "2100" = "steelblue"))

According to the Statistics Sweden, the national office of demographic data, 62.18% of the current Swedish population is between the ages 16-64. According to the projections for 2050 and 2100, the nation could see. a decrease in this large proportion of working-age individuals as the population ages.

##(d) 10 points Come up with a demographic question about your population that can be answered by another data source besides the WPP 2024. State the question and provide the answer in either table or figure format using the other dataset.

 Question: From 1990 - 2023, How do changes in air pollutant emissions from major chemical compounds in all industries compare to changes in total population, average annual mortality rate, and annual TFR?

 Data Source: Air pollutant data comes from the Swedish national Statistical Database, “Statistics Sweden” also known as the office of Official Statistics of Sweden. The website has an API query generator at the bottom of the above website.

 Licensing Info: “You may use Statistics Sweden’s open data free of charge. We use the licence CC0 for this data, which means that you may use and disseminate or provide such data without any requirement to state the source.”

 Note: “In 2022, large amounts of fossil gas leaked from the gas pipelines Nord Stream 1 and 2, which run through the Swedish economic zone. This leakage contained methane and other hydrocarbons, which are not included in the statistics in SSD. Emissions of volatile organic compounds (NMVOCs) from Nord Stream within the Swedish economic zone amounted to approximately 13 thousand tonnes, close to 10% of the total emissions of NMVOCs.”

In order to do the API call I wrote a script in Python because I have more API experience in Python than in R. That script can be found in the assignment’s zip file.

First we load the data and take a look at the different pollutants Sweden measures:

sweden_aq_data = read.csv("~/Documents/CSDE 533 Homeworks/sweden_aq.csv")

sweden_aq_data = sweden_aq_data %>% 
  mutate(Year = as.numeric(Year)) %>% 
  mutate(Emissions = as.numeric(Emissions)) %>% 
  select(-Sector) %>% 
  group_by(Pollutant)

pollutant_t_abbr = c("NOx", "NMVOC", "SO2", "NH3", "PM2.5", "PM10", 
                     "TSP", "BC", "CO")
pollutant_kg_abbr = c("Pb", "Cd", "Hg", "As", "Cr", 
                      "Cu", "Ni", "Se", "Zn", "DIOX", "benzo(a)", 
                      "benzo(b)", "benzo(k)", "Indeno", "PAH 1-4", 
                      "HCB", "PCB") 

# filter data by pollutants measured in tons
sweden_aq_data_tons = sweden_aq_data %>% 
  filter(Pollutant %in% pollutant_t_abbr)

# filter data by pollutants measured in kilograms
sweden_aq_data_kg = sweden_aq_data %>% 
  filter(Pollutant %in% pollutant_kg_abbr)

# Pollutant names for tons and kilograms
pollutant_t_names = c("Nitrogen oxide (t)", 
                      "Non methane volatile organic compounds (NMVOC) (t)", 
                      "Sulphur dioxide (t)", 
                      "Ammonia (NH3) (t)", 
                      "PM2.5 (t)", 
                      "PM10 (t)", 
                      "TSP (t)", 
                      "Black Carbon (BC) (t)", 
                      "Carbon Monoxide (CO) (t)")

pollutant_kg_names = c("Lead (Pb) (kg)", "Cadmium (Cd) (kg)", "Mercury (Hg) (kg)", "Arsenic (As) (kg)", 
                       "Chromium (Cr) (kg)", "Copper (Cu) (kg)", "Nickel (Ni) (kg)", "Selenium (Se) (kg)", 
                       "Zinc (Zn) (kg)", "Dioxin (g I-Teq)", "Benzo(a)pyrene (kg)", "Benzo(b)fluoranthene (kg)", 
                       "Benzo(k)fluoranthene (kg)","Indeno(1,2,3-cd)pyrene (kg)", "PAH 1-4 (kg)", "HCB (kg)", "PCB (kg)")

pollutant_tons_legend <- setNames(pollutant_t_names, pollutant_t_abbr)
pollutant_kg_legend <- setNames(pollutant_kg_names, pollutant_kg_abbr)

sweden_aq_data_tons$Pollutant <- factor(sweden_aq_data_tons$Pollutant, levels = pollutant_t_abbr)
sweden_aq_data_kg$Pollutant <- factor(sweden_aq_data_kg$Pollutant, levels = pollutant_kg_abbr)
ggplot(sweden_aq_data_tons, aes(x = Year, y = Emissions, color = Pollutant)) +
  geom_line(linewidth = 1) +
  geom_point(size = 0.85) +
  scale_x_continuous(breaks = seq(1990, 2023, by = 10)) +
  scale_y_continuous(
    limits = c(0, 1300000),
    breaks = seq(0, 1300000, by = 500000),
    labels = scales::comma
  ) +
  scale_color_viridis(discrete = TRUE, option = "mako", labels = pollutant_tons_legend) + 
  labs(
    title = "Sweden's Air Pollution Emissions in Tons (1990-2023)",
    x = "Year",
    y = "Emissions (tons)",
    color = "Pollutant"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 10),
    legend.title = element_blank(),
    panel.grid.major = element_blank()
  ) +
  guides(color = guide_legend(ncol = 1))

ggplot(sweden_aq_data_kg, aes(x = Year, y = Emissions, color = Pollutant)) +
  geom_line(linewidth = 1) +
  geom_point(size = 0.85) +
  scale_y_continuous(
    limits = c(0, 100000),
    breaks = seq(0, 100000, by = 10000),
    labels = scales::comma
  ) +
  scale_x_continuous(breaks = seq(1990, 2023, by = 10)) +
  scale_color_viridis(discrete = TRUE, option = "plasma", labels = pollutant_kg_legend) + 
  labs(
    title = "Sweden's Air Pollution Emissions in Kilograms (1990-2023)",
    x = "Year",
    y = "Emissions (kg)",
    color = "Pollutant"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 10),
    legend.title = element_blank(),
    panel.grid.major = element_blank()
  ) +
  guides(color = guide_legend(ncol = 1))

Then, we select the pollutants which overlap with those the World Health Organization cites as having the “strongest evidence for public health concern”: PM2.5, PM10, CO, and SO2. I will also be including Non methane volatile organic compounds (NMVOC).

WHO_pollutant_abbr = c("NMVOC", "SO2", "PM2.5", "PM10", "CO")

WHO_pollutant_names = c("Non methane volatile organic compounds (NMVOC) (t)", 
                   "Sulphur dioxide (t)", "PM2.5 (t)", 
                   "PM10 (t)", "Carbon Monoxide (CO) (t)")

sweden_aq_WHO = sweden_aq_data_tons %>% 
  filter(Pollutant %in% WHO_pollutant_abbr)

WHO_legend <- setNames(WHO_pollutant_names, WHO_pollutant_abbr)

ggplot(sweden_aq_WHO, aes(x = Year, y = Emissions, color = Pollutant)) +
  geom_line(linewidth = 1) +
  geom_point(size = 0.85) +
  scale_x_continuous(breaks = seq(1990, 2023, by = 10)) +
  scale_y_continuous(
    limits = c(0, 1300000),
    breaks = seq(0, 1300000, by = 500000),
    labels = scales::comma
  ) +
  scale_color_viridis(discrete = TRUE, option = "mako", labels = WHO_legend) + 
  labs(
    title = "Sweden's Emissions of 5 Pollutants in Tons 1990 - 2023",
    x = "Year",
    y = "Emissions (Tons)",
    color = "Pollutant"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 10),
    legend.title = element_blank(),
    panel.grid.major = element_blank()
  ) +
  guides(color = guide_legend(ncol = 1))

The first thing we can notice is that all of the five pollutants the WHO flags as posing the greatest risk to human health is that they are all decreasing, especially after the turn of the 21st century. It would be curious to see how these emissions compare, per capita, to a country of a higher population (and of a higher level of pollution) such as the United States, or China.

We combine the data with previously used metrics to begin plotting for just Sweden:

tfr_by_year_aq = tfr_by_year %>% 
  filter(Year >=1990 & Year <= 2023)

lifetable_sweden_aq = lifetable_sweden_raw %>% 
  filter(Year >=1990 & Year <= 2023) %>% 
  select(Year, Age, mx)

mx_by_year = lifetable_sweden_aq %>%
  group_by(Year) %>%  
  summarise(mx = mean(mx, na.rm = TRUE))

sweden_final = left_join(tfr_by_year_aq, mx_by_year, by = "Year")

sweden_aq_pivoted = sweden_aq_WHO %>%
  pivot_wider(names_from = Pollutant, values_from = Emissions)

pop_by_year_aq = pop_sweden_raw %>% 
  filter(Year >=1990 & Year <= 2023) %>% 
  select(Year, Total_Pop) %>% 
  group_by(Year) %>%  
  summarise(Annual_Pop = sum(Total_Pop, na.rm = TRUE))

sweden_final = left_join(sweden_final, sweden_aq_pivoted, by = "Year")
sweden_final = left_join(sweden_final, pop_by_year_aq, by = "Year")

Comparing against changes in total annual population:

ggplot(sweden_final, aes(x = Year)) +
  # Plot the air quality pollutants on the primary y-axis
  geom_line(aes(y = NMVOC, color = "NMVOC"), linewidth = 1) +
  geom_point(aes(y = NMVOC, color = "NMVOC"), size = 0.85) +
  
  geom_line(aes(y = SO2, color = "SO2"), linewidth = 1) +
  geom_point(aes(y = SO2, color = "SO2"), size = 0.85) +
  
  geom_line(aes(y = PM2.5, color = "PM2.5"), linewidth = 1) +
  geom_point(aes(y = PM2.5, color = "PM2.5"), size = 0.85) +
  
  geom_line(aes(y = PM10, color = "PM10"), linewidth = 1) +
  geom_point(aes(y = PM10, color = "PM10"), size = 0.85) +
  
  geom_line(aes(y = CO, color = "CO"), linewidth = 1) +
  geom_point(aes(y = CO, color = "CO"), size = 0.85) +
  
  geom_line(aes(y = (Annual_Pop - 8500000) * (3 / 7), color = "Population"), 
            linewidth = 1, linetype = "twodash", color = "orchid4") + 

  scale_x_continuous(breaks = seq(1990, 2023, by = 10)) +

  # primary y-axis (pollutant emissions)
  scale_y_continuous(
    limits = c(0, 1400000),
    breaks = seq(0, 1400000, by = 500000),
    labels = scales::comma,
    name = "Emissions (Tons)",
    
    # secondary y-axis (Annual Population)
    sec.axis = sec_axis(
      trans = ~ . * (7 / 3) + 8500000,
      name = "Population",
      labels = scales::comma_format()
    )
  ) +

  scale_color_viridis(discrete = TRUE, option = "mako", name = "Pollutant") +

  labs(
    title = "Sweden's Emissions of Top Pollutants in Tons and Annual Total Population (1990-2023)",
    x = "Year",
    color = "Legend"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 10),
    legend.title = element_blank(),
    panel.grid.major = element_blank()
  ) +
  guides(color = guide_legend(ncol = 1))

We can see in this graph the Annual Total Population in purple rising most sharply in the mid 2000’s, after a plateau through the late nineties. The rate of growth appears to be decreasing into the late 2010’s and early 2020’s. This is further supported by increasing migration outflows.

Comparing against changes in Average Annual Mortality Rate:

ggplot(sweden_final, aes(x = Year)) +
  # Plot the air quality pollutants on the primary y-axis
  geom_line(aes(y = NMVOC, color = "NMVOC"), linewidth = 1) +
  geom_point(aes(y = NMVOC, color = "NMVOC"), size = 0.85) +
  
  geom_line(aes(y = SO2, color = "SO2"), linewidth = 1) +
  geom_point(aes(y = SO2, color = "SO2"), size = 0.85) +
  
  geom_line(aes(y = PM2.5, color = "PM2.5"), linewidth = 1) +
  geom_point(aes(y = PM2.5, color = "PM2.5"), size = 0.85) +
  
  geom_line(aes(y = PM10, color = "PM10"), linewidth = 1) +
  geom_point(aes(y = PM10, color = "PM10"), size = 0.85) +
  
  geom_line(aes(y = CO, color = "CO"), linewidth = 1) +
  geom_point(aes(y = CO, color = "CO"), size = 0.85) +
  
  geom_line(aes(y = mx * 1e7, color = "Mortality Rate"), 
            linewidth = 1, linetype = "twodash", color = "orchid4") + 
  
  scale_x_continuous(breaks = seq(1990, 2023, by = 10)) +
  
  scale_y_continuous(
    limits = c(0, 1400000),
    breaks = seq(0, 1400000, by = 500000),
    labels = scales::comma,
    name = "Emissions (Tons)",
    
    sec.axis = sec_axis(
      trans = ~ . / 1e7,  
      name = "Mean Annaul Mortality Rate",
      breaks = seq(0.05, 0.15, by = 0.05),  
      labels = scales::number_format(scale = 1)  
    )
  ) +
  
  scale_color_viridis(discrete = TRUE, option = "mako", name = "Pollutant") +
  
  labs(
    title = "Sweden's Emissions of 5 Pollutants in Tons and Mean Annual Mortality Rate (1990-2023)",
    x = "Year",
    color = "Legend"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 10),
    legend.title = element_blank(),
    panel.grid.major = element_blank()
  ) +
  guides(color = guide_legend(ncol = 1))

Average annual mortality rates hover around 0.09 and 0.10, which squashes down the variability from the age-specific rates, which can vary as much as from one hundredth to nearly half of a tenth. An interesting part of this chart could be the drop in Carbon Monoxide (CO) in the late 1990s mirrors the drop in average annual mortality rate one or two years later. This cannot be linked as there may be confouding factors such as healthcare policy, changes in population composition, or changes in the economy or Sweden’s major industries. It’s still interesting and possibly worth exploring.

Comparing against changes in annual Total Fertility Rate (TFR):

ggplot(sweden_final, aes(x = Year)) +
  # Plot the air quality pollutants on the primary y-axis
  geom_line(aes(y = NMVOC, color = "NMVOC"), linewidth = 1) +
  geom_point(aes(y = NMVOC, color = "NMVOC"), size = 0.85) +
  
  geom_line(aes(y = SO2, color = "SO2"), linewidth = 1) +
  geom_point(aes(y = SO2, color = "SO2"), size = 0.85) +
  
  geom_line(aes(y = PM2.5, color = "PM2.5"), linewidth = 1) +
  geom_point(aes(y = PM2.5, color = "PM2.5"), size = 0.85) +
  
  geom_line(aes(y = PM10, color = "PM10"), linewidth = 1) +
  geom_point(aes(y = PM10, color = "PM10"), size = 0.85) +
  
  geom_line(aes(y = CO, color = "CO"), linewidth = 1) +
  geom_point(aes(y = CO, color = "CO"), size = 0.85) +
  
  geom_line(aes(y = (TFR - 1) * (1400000 / 1.5)), color = "orchid4", 
            linewidth = 1, linetype = "twodash") + 

  scale_x_continuous(breaks = seq(1990, 2023, by = 5)) +

  scale_y_continuous(
    limits = c(0, 1400000),
    breaks = seq(0, 1400000, by = 500000),
    labels = scales::comma,
    name = "Emissions (Tons)",
    
    sec.axis = sec_axis(
      trans = ~ . * (1.5 / 1400000) + 1,  
      name = "Total Fertility Rate",
      breaks = seq(1, 2.5, by = 0.5)
    )
  ) +

  scale_color_viridis(discrete = TRUE, option = "mako", name = "Pollutant") +

  labs(
    title = "Sweden's Emissions of Top Pollutants in Tons and Annual TFR (1990-2023)",
    x = "Year",
    color = "Legend"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 12),
    legend.title = element_blank(),
    panel.grid.major = element_blank()
  ) +
  guides(color = guide_legend(ncol = 1))

The last time Sweden was at replacement level (2.1 TFR) was in the early 1990’s. Decreases in TFR could be linked to an aging population (as seen in the population pyramid above comparing Sweden in 1950 vs. 2000) or also to decreasing net migration rates. Even if Sweden were to instantly rise to replacement level, the concept of population momentum may see a gradual decrease in population before the expected increase, due to the high proportion of older age groups in the population.